summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp29
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp5
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp6
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp24
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp391
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp15
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp35
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp43
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp13
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h19
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp19
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp58
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp169
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp24
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp4
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h13
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp36
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp17
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp49
-rw-r--r--lib/Transforms/Scalar/GVN.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp31
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp85
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp62
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp53
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp12
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp2
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp10
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp36
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp4
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp72
-rw-r--r--lib/Transforms/Scalar/SROA.cpp26
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp626
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp4
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp4
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp74
-rw-r--r--lib/Transforms/Utils/Local.cpp60
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp4
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp4
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp32
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp9
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp3
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp27
61 files changed, 1708 insertions, 609 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index a2c8a32dfe866..25db0eff8848e 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
AttributeList PAL = F->getAttributes();
// First, determine the new argument list
- unsigned ArgIndex = 0;
+ unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++ArgIndex) {
+ ++I, ++ArgNo) {
if (ByValArgsToTransform.count(&*I)) {
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex));
+ ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
@@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
CallSite::arg_iterator AI = CS.arg_begin();
- ArgIndex = 1;
+ ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++AI, ++ArgIndex)
+ ++I, ++AI, ++ArgNo)
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
- ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
} else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
}
// Push any varargs arguments on the list.
- for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
- ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// avoiding a register copy.
if (PtrArg->hasStructRetAttr()) {
unsigned ArgNo = PtrArg->getArgNo();
- F->setAttributes(
- F->getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ F->removeAttribute(ArgNo + 1, Attribute::StructRet);
+ F->addAttribute(ArgNo + 1, Attribute::NoAlias);
for (Use &U : F->uses()) {
CallSite CS(U.getUser());
- CS.setAttributes(
- CS.getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1,
- Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ CS.removeAttribute(ArgNo + 1, Attribute::StructRet);
+ CS.addAttribute(ArgNo + 1, Attribute::NoAlias);
}
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 9648883b7f275..031c3d8a9ebed 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
bool MadeChange = false;
for (Function *F : SCCNodes) {
- if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+ if (F->doesNotAlias(AttributeList::ReturnIndex) ||
+ !F->getReturnType()->isPointerTy())
continue;
- F->setDoesNotAlias(0);
+ F->setDoesNotAlias(AttributeList::ReturnIndex);
++NumNoAlias;
MadeChange = true;
}
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index d66411f04cc4a..c7ef2494e3b8b 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -25,7 +26,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/SourceMgr.h"
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ae9d4ce11e0db..f277a51ae659a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
- SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+ SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
@@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {
NewU->insertBefore(UI);
UI->replaceUsesOfWith(U, NewU);
}
- U->dropAllReferences();
+ // We've replaced all the uses, so destroy the constant. (destroyConstant
+ // will update value handles and metadata.)
+ U->destroyConstant();
}
}
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 9c83f88b2210c..a8b0f32fd785e 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 771770ddc0600..0e478ba607be2 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -207,11 +207,13 @@ private:
/// A work queue of functions that may have been modified and should be
/// analyzed again.
- std::vector<WeakVH> Deferred;
+ std::vector<WeakTrackingVH> Deferred;
/// Checks the rules of order relation introduced among functions set.
/// Returns true, if sanity check has been passed, and false if failed.
- bool doSanityCheck(std::vector<WeakVH> &Worklist);
+#ifndef NDEBUG
+ bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+#endif
/// Insert a ComparableFunction into the FnTree, or merge it away if it's
/// equal to one that's already present.
@@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
}
-bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+#ifndef NDEBUG
+bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
if (const unsigned Max = NumFunctionsForSanityCheck) {
unsigned TripleNumber = 0;
bool Valid = true;
@@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
unsigned i = 0;
- for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+ for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
+ E = Worklist.end();
I != E && i < Max; ++I, ++i) {
unsigned j = i;
- for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+ for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
+ ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
@@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
unsigned k = j;
- for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+ for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
++k, ++K, ++TripleNumber) {
if (K == J)
continue;
@@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
}
return true;
}
+#endif
bool MergeFunctions::runOnModule(Module &M) {
if (skipModule(M))
@@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) {
// consider merging it. Otherwise it is dropped and never considered again.
if ((I != S && std::prev(I)->first == I->first) ||
(std::next(I) != IE && std::next(I)->first == I->first) ) {
- Deferred.push_back(WeakVH(I->second));
+ Deferred.push_back(WeakTrackingVH(I->second));
}
}
do {
- std::vector<WeakVH> Worklist;
+ std::vector<WeakTrackingVH> Worklist;
Deferred.swap(Worklist);
DEBUG(doSanityCheck(Worklist));
@@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) {
DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
// Insert functions and merge them.
- for (WeakVH &I : Worklist) {
+ for (WeakTrackingVH &I : Worklist) {
if (!I)
continue;
Function *F = cast<Function>(I);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 78e71c18fe290..1bb9d654ec1cc 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -16,8 +16,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -31,13 +35,18 @@ using namespace llvm;
#define DEBUG_TYPE "partial-inlining"
-STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+STATISTIC(NumPartialInlined,
+ "Number of callsites functions partially inlined into.");
// Command line option to disable partial-inlining. The default is false:
static cl::opt<bool>
DisablePartialInlining("disable-partial-inlining", cl::init(false),
cl::Hidden, cl::desc("Disable partial ininling"));
+static cl::opt<unsigned> MaxNumInlineBlocks(
+ "max-num-inline-blocks", cl::init(5), cl::Hidden,
+ cl::desc("Max Number of Blocks To be Partially Inlined"));
+
// Command line option to set the maximum number of partial inlining allowed
// for the module. The default value of -1 means no limit.
static cl::opt<int> MaxNumPartialInlining(
@@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining(
cl::desc("Max number of partial inlining. The default is unlimited"));
namespace {
+
+struct FunctionOutliningInfo {
+ FunctionOutliningInfo()
+ : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
+ ReturnBlockPreds() {}
+ // Returns the number of blocks to be inlined including all blocks
+ // in Entries and one return block.
+ unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+
+ // A set of blocks including the function entry that guard
+ // the region to be outlined.
+ SmallVector<BasicBlock *, 4> Entries;
+ // The return block that is not included in the outlined region.
+ BasicBlock *ReturnBlock;
+ // The dominating block of the region ot be outlined.
+ BasicBlock *NonReturnBlock;
+ // The set of blocks in Entries that that are predecessors to ReturnBlock
+ SmallVector<BasicBlock *, 4> ReturnBlockPreds;
+};
+
struct PartialInlinerImpl {
- PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
+ PartialInlinerImpl(
+ std::function<AssumptionCache &(Function &)> *GetAC,
+ std::function<TargetTransformInfo &(Function &)> *GTTI,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
+ ProfileSummaryInfo *ProfSI)
+ : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
bool run(Module &M);
Function *unswitchFunction(Function *F);
+ std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
+
private:
- InlineFunctionInfo IFI;
int NumPartialInlining = 0;
+ std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+ std::function<TargetTransformInfo &(Function &)> *GetTTI;
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
+ ProfileSummaryInfo *PSI;
+ bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
bool IsLimitReached() {
return (MaxNumPartialInlining != -1 &&
NumPartialInlining >= MaxNumPartialInlining);
}
};
+
struct PartialInlinerLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
PartialInlinerLegacyPass() : ModulePass(ID) {
@@ -67,91 +108,319 @@ struct PartialInlinerLegacyPass : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
+ TargetTransformInfoWrapperPass *TTIWP =
+ &getAnalysis<TargetTransformInfoWrapperPass>();
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&ACT](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
- return PartialInlinerImpl(IFI).run(M);
+
+ std::function<TargetTransformInfo &(Function &)> GetTTI =
+ [&TTIWP](Function &F) -> TargetTransformInfo & {
+ return TTIWP->getTTI(F);
+ };
+
+ return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
}
};
}
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
- // First, verify that this function is an unswitching candidate...
- if (F->hasAddressTaken())
- return nullptr;
-
+std::unique_ptr<FunctionOutliningInfo>
+PartialInlinerImpl::computeOutliningInfo(Function *F) {
BasicBlock *EntryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
- return nullptr;
+ return std::unique_ptr<FunctionOutliningInfo>();
+
+ // Returns true if Succ is BB's successor
+ auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
+ return is_contained(successors(BB), Succ);
+ };
+
+ auto SuccSize = [](BasicBlock *BB) {
+ return std::distance(succ_begin(BB), succ_end(BB));
+ };
+
+ auto IsReturnBlock = [](BasicBlock *BB) {
+ TerminatorInst *TI = BB->getTerminator();
+ return isa<ReturnInst>(TI);
+ };
+
+ auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+ if (IsReturnBlock(Succ1))
+ return std::make_tuple(Succ1, Succ2);
+ if (IsReturnBlock(Succ2))
+ return std::make_tuple(Succ2, Succ1);
+
+ return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+ };
+
+ // Detect a triangular shape:
+ auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+ if (IsSuccessor(Succ1, Succ2))
+ return std::make_tuple(Succ1, Succ2);
+ if (IsSuccessor(Succ2, Succ1))
+ return std::make_tuple(Succ2, Succ1);
+
+ return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+ };
+
+ std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+ llvm::make_unique<FunctionOutliningInfo>();
+
+ BasicBlock *CurrEntry = EntryBlock;
+ bool CandidateFound = false;
+ do {
+ // The number of blocks to be inlined has already reached
+ // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
+ // disables partial inlining for the function.
+ if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+ break;
+
+ if (SuccSize(CurrEntry) != 2)
+ break;
+
+ BasicBlock *Succ1 = *succ_begin(CurrEntry);
+ BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
+
+ BasicBlock *ReturnBlock, *NonReturnBlock;
+ std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+
+ if (ReturnBlock) {
+ OutliningInfo->Entries.push_back(CurrEntry);
+ OutliningInfo->ReturnBlock = ReturnBlock;
+ OutliningInfo->NonReturnBlock = NonReturnBlock;
+ CandidateFound = true;
+ break;
+ }
+
+ BasicBlock *CommSucc;
+ BasicBlock *OtherSucc;
+ std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
+
+ if (!CommSucc)
+ break;
+
+ OutliningInfo->Entries.push_back(CurrEntry);
+ CurrEntry = OtherSucc;
+
+ } while (true);
+
+ if (!CandidateFound)
+ return std::unique_ptr<FunctionOutliningInfo>();
+
+ // Do sanity check of the entries: threre should not
+ // be any successors (not in the entry set) other than
+ // {ReturnBlock, NonReturnBlock}
+ assert(OutliningInfo->Entries[0] == &F->front());
+ DenseSet<BasicBlock *> Entries;
+ for (BasicBlock *E : OutliningInfo->Entries)
+ Entries.insert(E);
+
+ // Returns true of BB has Predecessor which is not
+ // in Entries set.
+ auto HasNonEntryPred = [Entries](BasicBlock *BB) {
+ for (auto Pred : predecessors(BB)) {
+ if (!Entries.count(Pred))
+ return true;
+ }
+ return false;
+ };
+ auto CheckAndNormalizeCandidate =
+ [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
+ for (BasicBlock *E : OutliningInfo->Entries) {
+ for (auto Succ : successors(E)) {
+ if (Entries.count(Succ))
+ continue;
+ if (Succ == OutliningInfo->ReturnBlock)
+ OutliningInfo->ReturnBlockPreds.push_back(E);
+ else if (Succ != OutliningInfo->NonReturnBlock)
+ return false;
+ }
+ // There should not be any outside incoming edges either:
+ if (HasNonEntryPred(E))
+ return false;
+ }
+ return true;
+ };
+
+ if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
+ return std::unique_ptr<FunctionOutliningInfo>();
- BasicBlock *ReturnBlock = nullptr;
- BasicBlock *NonReturnBlock = nullptr;
- unsigned ReturnCount = 0;
- for (BasicBlock *BB : successors(EntryBlock)) {
- if (isa<ReturnInst>(BB->getTerminator())) {
- ReturnBlock = BB;
- ReturnCount++;
- } else
- NonReturnBlock = BB;
+ // Now further growing the candidate's inlining region by
+ // peeling off dominating blocks from the outlining region:
+ while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+ BasicBlock *Cand = OutliningInfo->NonReturnBlock;
+ if (SuccSize(Cand) != 2)
+ break;
+
+ if (HasNonEntryPred(Cand))
+ break;
+
+ BasicBlock *Succ1 = *succ_begin(Cand);
+ BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
+
+ BasicBlock *ReturnBlock, *NonReturnBlock;
+ std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+ if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
+ break;
+
+ if (NonReturnBlock->getSinglePredecessor() != Cand)
+ break;
+
+ // Now grow and update OutlininigInfo:
+ OutliningInfo->Entries.push_back(Cand);
+ OutliningInfo->NonReturnBlock = NonReturnBlock;
+ OutliningInfo->ReturnBlockPreds.push_back(Cand);
+ Entries.insert(Cand);
}
- if (ReturnCount != 1)
+ return OutliningInfo;
+}
+
+bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
+ OptimizationRemarkEmitter &ORE) {
+ // TODO : more sharing with shouldInline in Inliner.cpp
+ using namespace ore;
+ Instruction *Call = CS.getInstruction();
+ Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
+ auto &CalleeTTI = (*GetTTI)(*Callee);
+ InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
+ *GetAssumptionCache, GetBFI, PSI);
+
+ if (IC.isAlways()) {
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+ << NV("Callee", Callee)
+ << " should always be fully inlined, not partially");
+ return false;
+ }
+
+ if (IC.isNever()) {
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Callee) << " not partially inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
+ return false;
+ }
+
+ if (!IC) {
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Callee) << " not partially inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
+ << NV("Cost", IC.getCost()) << ", threshold="
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ return false;
+ }
+
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+ << NV("Callee", Callee) << " can be partially inlined into "
+ << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
+ << " (threshold="
+ << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+ return true;
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+ if (F->hasAddressTaken())
+ return nullptr;
+
+ std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+ computeOutliningInfo(F);
+
+ if (!OutliningInfo)
return nullptr;
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
Function *DuplicateFunction = CloneFunction(F, VMap);
- DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
- BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
- BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
- BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+ BasicBlock *NewReturnBlock =
+ cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
+ BasicBlock *NewNonReturnBlock =
+ cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
+ DenseSet<BasicBlock *> NewEntries;
+ for (BasicBlock *BB : OutliningInfo->Entries) {
+ NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+ }
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
F->replaceAllUsesWith(DuplicateFunction);
+ auto getFirstPHI = [](BasicBlock *BB) {
+ BasicBlock::iterator I = BB->begin();
+ PHINode *FirstPhi = nullptr;
+ while (I != BB->end()) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ break;
+ if (!FirstPhi) {
+ FirstPhi = Phi;
+ break;
+ }
+ }
+ return FirstPhi;
+ };
// Special hackery is needed with PHI nodes that have inputs from more than
// one extracted block. For simplicity, just split the PHIs into a two-level
// sequence of PHIs, some of which will go in the extracted region, and some
// of which will go outside.
BasicBlock *PreReturn = NewReturnBlock;
- NewReturnBlock = NewReturnBlock->splitBasicBlock(
- NewReturnBlock->getFirstNonPHI()->getIterator());
- BasicBlock::iterator I = PreReturn->begin();
- Instruction *Ins = &NewReturnBlock->front();
- while (I != PreReturn->end()) {
- PHINode *OldPhi = dyn_cast<PHINode>(I);
- if (!OldPhi)
- break;
-
- PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
- OldPhi->replaceAllUsesWith(RetPhi);
- Ins = NewReturnBlock->getFirstNonPHI();
-
- RetPhi->addIncoming(&*I, PreReturn);
- RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
- NewEntryBlock);
- OldPhi->removeIncomingValue(NewEntryBlock);
-
- ++I;
+ // only split block when necessary:
+ PHINode *FirstPhi = getFirstPHI(PreReturn);
+ unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
+ if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
+
+ NewReturnBlock = NewReturnBlock->splitBasicBlock(
+ NewReturnBlock->getFirstNonPHI()->getIterator());
+ BasicBlock::iterator I = PreReturn->begin();
+ Instruction *Ins = &NewReturnBlock->front();
+ while (I != PreReturn->end()) {
+ PHINode *OldPhi = dyn_cast<PHINode>(I);
+ if (!OldPhi)
+ break;
+
+ PHINode *RetPhi =
+ PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+ OldPhi->replaceAllUsesWith(RetPhi);
+ Ins = NewReturnBlock->getFirstNonPHI();
+
+ RetPhi->addIncoming(&*I, PreReturn);
+ for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+ RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
+ OldPhi->removeIncomingValue(NewE);
+ }
+ ++I;
+ }
+ for (auto E : OutliningInfo->ReturnBlockPreds) {
+ BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+ NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+ }
}
- NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+ // Returns true if the block is to be partial inlined into the caller
+ // (i.e. not to be extracted to the out of line function)
+ auto ToBeInlined = [=](BasicBlock *BB) {
+ return BB == NewReturnBlock || NewEntries.count(BB);
+ };
// Gather up the blocks that we're going to extract.
std::vector<BasicBlock *> ToExtract;
ToExtract.push_back(NewNonReturnBlock);
for (BasicBlock &BB : *DuplicateFunction)
- if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
- &BB != NewNonReturnBlock)
+ if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
ToExtract.push_back(&BB);
// The CodeExtractor needs a dominator tree.
@@ -183,16 +452,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
if (IsLimitReached())
continue;
- NumPartialInlining++;
OptimizationRemarkEmitter ORE(CS.getCaller());
+ if (!shouldPartialInline(CS, ORE))
+ continue;
+
DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
BasicBlock *Block = CS.getParent();
ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
<< ore::NV("Callee", F) << " partially inlined into "
<< ore::NV("Caller", CS.getCaller()));
+ InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
InlineFunction(CS, IFI);
+ NumPartialInlining++;
+ // update stats
+ NumPartialInlined++;
}
// Ditch the duplicate, since we're done with it, and rewrite all remaining
@@ -200,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
DuplicateFunction->replaceAllUsesWith(F);
DuplicateFunction->eraseFromParent();
- ++NumPartialInlined;
return ExtractedFunction;
}
@@ -246,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
"Partial Inliner", false, false)
@@ -256,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {
PreservedAnalyses PartialInlinerPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
std::function<AssumptionCache &(Function &)> GetAssumptionCache =
[&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
- if (PartialInlinerImpl(IFI).run(M))
+
+ std::function<BlockFrequencyInfo &(Function &)> GetBFI =
+ [&FAM](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ std::function<TargetTransformInfo &(Function &)> GetTTI =
+ [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+ if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0d5910ebbfcc9..203594572618a 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -38,6 +38,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
@@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(true), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass (default = on)"));
+ "enable-gvn-hoist", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN hoisting pass (default = off)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
cl::Hidden,
cl::desc("Disable shrink-wrap library calls"));
+static cl::opt<bool>
+ EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable the simple loop unswitch pass."));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+ if (EnableSimpleLoopUnswitch)
+ MPM.add(createSimpleLoopUnswitchLegacyPass());
+ else
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 9801a0a614165..d3a3c24ce7b44 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -30,42 +30,11 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
namespace {
-// Produce a unique identifier for this module by taking the MD5 sum of the
-// names of the module's strong external symbols. This identifier is
-// normally guaranteed to be unique, or the program would fail to link due to
-// multiply defined symbols.
-//
-// If the module has no strong external symbols (such a module may still have a
-// semantic effect if it performs global initialization), we cannot produce a
-// unique identifier for this module, so we return the empty string, which
-// causes the entire module to be written as a regular LTO module.
-std::string getModuleId(Module *M) {
- MD5 Md5;
- bool ExportsSymbols = false;
- for (auto &GV : M->global_values()) {
- if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
- !GV.hasExternalLinkage())
- continue;
- ExportsSymbols = true;
- Md5.update(GV.getName());
- Md5.update(ArrayRef<uint8_t>{0});
- }
-
- if (!ExportsSymbols)
- return "";
-
- MD5::MD5Result R;
- Md5.final(R);
-
- SmallString<32> Str;
- MD5::stringifyResult(R, Str);
- return ("$" + Str).str();
-}
-
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
@@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
void splitAndWriteThinLTOBitcode(
raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter, Module &M) {
- std::string ModuleId = getModuleId(&M);
+ std::string ModuleId = getUniqueModuleId(&M);
if (ModuleId.empty()) {
// We couldn't generate a module ID for this module, just write it out as a
// regular LTO module.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 030461004f568..4f1f194997683 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
// Find the most significant known 0 other than the sign bit.
int BitWidth = Op0KnownZero.getBitWidth();
APInt Op0KnownZeroTemp(Op0KnownZero);
- Op0KnownZeroTemp.clearBit(BitWidth - 1);
+ Op0KnownZeroTemp.clearSignBit();
int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;
int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1;
@@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+ I.hasNoUnsignedWrap(), SQ))
return replaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
@@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V =
- SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ))
return replaceInstUsesWith(I, V);
if (isa<Constant>(RHS))
@@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+ I.hasNoUnsignedWrap(), SQ))
return replaceInstUsesWith(I, V);
// (A*B)-(A*C) -> A*(B-C) etc
@@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V =
- SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ))
return replaceInstUsesWith(I, V);
// fsub nsz 0, X ==> fsub nsz -0.0, X
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a97b5a9ec0bb8..c7092bf3a398b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I,
// (~B | A) & (~A | B) --> ~(A ^ B)
// (~B | A) & (B | ~A) --> ~(A ^ B)
if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
return nullptr;
@@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyAndInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
// See if we can simplify any instructions used by the instruction whose sole
@@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyOrInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
// See if we can simplify any instructions used by the instruction whose sole
@@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyXorInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
if (Instruction *NewXor = foldXorToXor(I))
@@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
+ // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
+ Value *X, *Y;
+
+ // We must eliminate the and/or (one-use) for these transforms to not increase
+ // the instruction count.
+ // ~(~X & Y) --> (X | ~Y)
+ // ~(Y & ~X) --> (X | ~Y)
+ if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
+ Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+ return BinaryOperator::CreateOr(X, NotY);
+ }
+ // ~(~X | Y) --> (X & ~Y)
+ // ~(Y | ~X) --> (X & ~Y)
+ if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
+ Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+ return BinaryOperator::CreateAnd(X, NotY);
+ }
+
// Is this a 'not' (~) fed by a binary operator?
BinaryOperator *NotOp;
if (match(&I, m_Not(m_BinOp(NotOp)))) {
if (NotOp->getOpcode() == Instruction::And ||
NotOp->getOpcode() == Instruction::Or) {
- // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
- // ~(~X | Y) === (X & ~Y) - De Morgan's Law
- if (dyn_castNotVal(NotOp->getOperand(1)))
- NotOp->swapOperands();
- if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) {
- Value *NotY = Builder->CreateNot(
- NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not");
- if (NotOp->getOpcode() == Instruction::And)
- return BinaryOperator::CreateOr(Op0NotVal, NotY);
- return BinaryOperator::CreateAnd(Op0NotVal, NotY);
- }
-
- // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
- // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ // Apply DeMorgan's Law when inverts are free:
+ // ~(X & Y) --> (~X | ~Y)
+ // ~(X | Y) --> (~X & ~Y)
if (IsFreeToInvert(NotOp->getOperand(0),
NotOp->getOperand(0)->hasOneUse()) &&
IsFreeToInvert(NotOp->getOperand(1),
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 313ab13b9e2b7..e9286b1bf1750 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
for (unsigned i = 0; i != NumSubElts; ++i) {
unsigned SubEltIdx = (NumSubElts - 1) - i;
auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
- Count = Count.shl(BitWidth);
+ Count <<= BitWidth;
Count |= SubElt->getValue().zextOrTrunc(64);
}
}
@@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
- unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros()
- : Known.One.countLeadingZeros();
- APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
- : APInt::getHighBitsSet(BitWidth, NumMaskBits);
+ unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros()
+ : Known.One.countLeadingZeros();
+ unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes()
+ : Known.Zero.countLeadingOnes();
// If all bits above (ctlz) or below (cttz) the first known one are known
// zero, this value is constant.
// FIXME: This should be in InstSimplify because we're replacing an
// instruction with a constant.
- if (Mask.isSubsetOf(Known.Zero)) {
- auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
+ if (PossibleZeros == DefiniteZeros) {
+ auto *C = ConstantInt::get(IT, DefiniteZeros);
return IC.replaceInstUsesWith(II, C);
}
@@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
/// lifting.
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
auto Args = CI.arg_operands();
- if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
- &TLI, &DT, &AC))
+ if (Value *V =
+ SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ))
return replaceInstUsesWith(CI, V);
if (isFreeCall(&CI, &TLI))
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d846a631b96f6..60970775de63b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
Max = Known.One|UnknownBits;
if (UnknownBits.isNegative()) { // Sign bit is unknown
- Min.setBit(Min.getBitWidth()-1);
- Max.clearBit(Max.getBitWidth()-1);
+ Min.setSignBit();
+ Max.clearSignBit();
}
}
@@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Changed = true;
}
- if (Value *V =
- SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I))
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
+ SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
// comparing -val or val with non-zero is the same as just comparing val
@@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
- I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I))
+ if (Value *V =
+ SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(),
+ SQ.getWithInstruction(&I)))
return replaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 776686d3d1171..3be6419a129a4 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -17,9 +17,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -27,10 +29,9 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/IR/DIBuilder.h"
#define DEBUG_TYPE "instcombine"
@@ -193,7 +194,7 @@ private:
TargetLibraryInfo &TLI;
DominatorTree &DT;
const DataLayout &DL;
-
+ const SimplifyQuery SQ;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
LoopInfo *LI;
@@ -203,11 +204,11 @@ private:
public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
- AssumptionCache &AC, TargetLibraryInfo &TLI,
- DominatorTree &DT, const DataLayout &DL, LoopInfo *LI)
+ AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
+ const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
- DL(DL), LI(LI), MadeIRChange(false) {}
+ DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {}
/// \brief Run the combiner over the entire worklist until it is empty.
///
@@ -533,6 +534,12 @@ private:
/// value, or null if it didn't simplify.
Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+ /// This tries to simplify binary operations by factorizing out common terms
+ /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+ Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &,
+ Instruction::BinaryOps, Value *, Value *, Value *,
+ Value *);
+
/// \brief Attempts to replace V with a simpler value based on the demanded
/// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ce66581a491a0..face9d9237ae1 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyMulInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
- if (Value *V =
- SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ))
return replaceInstUsesWith(I, V);
bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyUDivInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
// Handle the integer div common cases
@@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifySDivInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
// Handle the integer div common cases
@@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
- DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ))
return replaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
@@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyURemInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
if (Instruction *common = commonIRemTransforms(I))
@@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifySRemInst(Op0, Op1, SQ))
return replaceInstUsesWith(I, V);
// Handle the integer rem common cases
@@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
- DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ))
return replaceInstUsesWith(I, V);
// Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 85e5b6ba2dc2f..1117c11f4f51d 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyInstruction(&PN, SQ))
return replaceInstUsesWith(PN, V);
if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 76829c5e457bf..7afb8814fe52d 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *FalseVal = SI.getFalseValue();
Type *SelType = SI.getType();
- if (Value *V =
- SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ))
return replaceInstUsesWith(SI, V);
if (Instruction *I = canonicalizeSelectToShuffle(SI))
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index f77d713b9b071..219effce7ba56 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+ I.hasNoUnsignedWrap(), SQ))
return replaceInstUsesWith(I, V);
if (Instruction *V = commonShiftTransforms(I))
@@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ))
return replaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
@@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ))
return replaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8d0ed8532779e..0195c5e727c9a 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero))
+ if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
Known.Zero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One))
+ if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
Known.One |= ~LowBits;
assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
@@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (DemandedMask.isSignBitSet()) {
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
// If it's known zero, our sign bit is also zero.
- if (LHSKnown.Zero.isSignBitSet())
- Known.Zero.setSignBit();
+ if (LHSKnown.isNonNegative())
+ Known.makeNonNegative();
}
break;
case Instruction::URem: {
@@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
- LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum));
+ LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
UndefElts |= LaneElts;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e89b400a4afc8..7fc6774f1849c 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
- if (Value *V = SimplifyExtractElementInst(
- EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
+ EI.getIndexOperand(), SQ))
return replaceInstUsesWith(EI, V);
// If vector val is constant with all elements the same, replace EI with
@@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(),
- SVI.getType(), DL, &TLI, &DT, &AC))
+ if (auto *V =
+ SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ))
return replaceInstUsesWith(SVI, V);
bool MadeChange = false;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4729c79ca4c3d..1eb98b18bfb50 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) {
// It simplifies to V. Form "A op V".
I.setOperand(0, A);
I.setOperand(1, V);
@@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) {
// It simplifies to V. Form "V op C".
I.setOperand(0, V);
I.setOperand(1, C);
@@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
// It simplifies to V. Form "V op B".
I.setOperand(0, V);
I.setOperand(1, B);
@@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
// It simplifies to V. Form "B op V".
I.setOperand(0, B);
I.setOperand(1, V);
@@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
- const DataLayout &DL, BinaryOperator &I,
- Instruction::BinaryOps InnerOpcode, Value *A,
- Value *B, Value *C, Value *D) {
+Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
+ BinaryOperator &I,
+ Instruction::BinaryOps InnerOpcode,
+ Value *A, Value *B, Value *C, Value *D) {
assert(A && B && C && D && "All values must be provided");
Value *V = nullptr;
@@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "A op' (B op D)".
// If "B op D" simplifies then it can be formed with no cost.
- V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+ V = SimplifyBinOp(TopLevelOpcode, B, D, SQ);
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "(A op C) op' B".
// If "A op C" simplifies then it can be formed with no cost.
- V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+ V = SimplifyBinOp(TopLevelOpcode, A, C, SQ);
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
// The instruction has the form "(A op' B) op (C op' D)". Try to factorize
// a common term.
if (Op0 && Op1 && LHSOpcode == RHSOpcode)
- if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+ if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))
return V;
// The instruction has the form "(A op' B) op (C)". Try to factorize common
// term.
if (Op0)
if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
- if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
- Ident))
+ if (Value *V =
+ tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))
return V;
// The instruction has the form "(B) op (C op' D)". Try to factorize common
// term.
if (Op1)
if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
- if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident,
- C, D))
+ if (Value *V =
+ tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))
return V;
}
@@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
// Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
- if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) {
// They do! Return "L op' R".
++NumExpand;
C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
// Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
- if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) {
// They do! Return "L op' R".
++NumExpand;
A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (SI0->getCondition() == SI1->getCondition()) {
Value *SI = nullptr;
if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
- SI1->getFalseValue(), DL, &TLI, &DT, &AC))
+ SI1->getFalseValue(), SQ))
SI = Builder->CreateSelect(SI0->getCondition(),
Builder->CreateBinOp(TopLevelOpcode,
SI0->getTrueValue(),
SI1->getTrueValue()),
V);
if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
- SI1->getTrueValue(), DL, &TLI, &DT, &AC))
+ SI1->getTrueValue(), SQ))
SI = Builder->CreateSelect(
SI0->getCondition(), V,
Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
@@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
- if (Value *V =
- SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ))
return replaceInstUsesWith(GEP, V);
Value *PtrOp = GEP.getOperand(0);
@@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (SO1->getType() != GO1->getType())
return nullptr;
- Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC);
+ Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ);
// Only do the combine when we are sure the cost after the
// merge is never more than that before the merge.
if (Sum == nullptr)
@@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
return isAllocLikeFn(V, TLI) && V != AI;
}
-static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
- const TargetLibraryInfo *TLI) {
+static bool isAllocSiteRemovable(Instruction *AI,
+ SmallVectorImpl<WeakTrackingVH> &Users,
+ const TargetLibraryInfo *TLI) {
SmallVector<Instruction*, 4> Worklist;
Worklist.push_back(AI);
@@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
// If we have a malloc call which is only used in any amount of comparisons
// to null and free calls, delete the calls and replace the comparisons with
// true or false as appropriate.
- SmallVector<WeakVH, 64> Users;
+ SmallVector<WeakTrackingVH, 64> Users;
if (isAllocSiteRemovable(&MI, Users, &TLI)) {
for (unsigned i = 0, e = Users.size(); i != e; ++i) {
// Lowering all @llvm.objectsize calls first because they may
@@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
if (!EV.hasIndices())
return replaceInstUsesWith(EV, Agg);
- if (Value *V =
- SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC))
+ if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ))
return replaceInstUsesWith(EV, V);
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b866958e3c4b8..b034ccc469338 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName =
"__asan_register_image_globals";
static const char *const kAsanUnregisterImageGlobalsName =
"__asan_unregister_image_globals";
+static const char *const kAsanRegisterElfGlobalsName =
+ "__asan_register_elf_globals";
+static const char *const kAsanUnregisterElfGlobalsName =
+ "__asan_unregister_elf_globals";
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init";
@@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName =
"__asan_poison_stack_memory";
static const char *const kAsanUnpoisonStackMemoryName =
"__asan_unpoison_stack_memory";
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
static const char *const kAsanGlobalsRegisteredFlagName =
- "__asan_globals_registered";
+ "___asan_globals_registered";
static const char *const kAsanOptionDetectUseAfterReturn =
"__asan_option_detect_stack_use_after_return";
@@ -270,6 +277,13 @@ static cl::opt<bool>
"code stripping of globals"),
cl::Hidden, cl::init(true));
+// This is on by default even though there is a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool>
+ ClWithComdat("asan-with-comdat",
+ cl::desc("Place ASan constructors in comdat sections"),
+ cl::Hidden, cl::init(true));
+
// Debug flags.
static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
cl::init(0));
@@ -607,10 +621,14 @@ public:
private:
void initializeCallbacks(Module &M);
- bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
+ bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers);
+ void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers,
+ const std::string &UniqueModuleId);
void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers);
@@ -621,7 +639,8 @@ private:
GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
StringRef OriginalName);
- void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata);
+ void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+ StringRef InternalSuffix);
IRBuilder<> CreateAsanModuleDtor(Module &M);
bool ShouldInstrumentGlobal(GlobalVariable *G);
@@ -647,6 +666,11 @@ private:
Function *AsanUnregisterGlobals;
Function *AsanRegisterImageGlobals;
Function *AsanUnregisterImageGlobals;
+ Function *AsanRegisterElfGlobals;
+ Function *AsanUnregisterElfGlobals;
+
+ Function *AsanCtorFunction = nullptr;
+ Function *AsanDtorFunction = nullptr;
};
// Stack poisoning does not play well with exception handling.
@@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
void AddressSanitizerModule::createInitializerPoisonCalls(
Module &M, GlobalValue *ModuleName) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return;
+
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA)
+ return;
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
for (Use &OP : CA->operands()) {
if (isa<ConstantAggregateZero>(OP)) continue;
ConstantStruct *CS = cast<ConstantStruct>(OP);
@@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+
+ AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy));
+ AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
+
+ AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+ M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy));
+ AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
}
// Put the metadata and the instrumented global in the same group. This ensures
// that the metadata is discarded if the instrumented global is discarded.
void AddressSanitizerModule::SetComdatForGlobalMetadata(
- GlobalVariable *G, GlobalVariable *Metadata) {
+ GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
Module &M = *G->getParent();
Comdat *C = G->getComdat();
if (!C) {
@@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
assert(G->hasLocalLinkage());
G->setName(Twine(kAsanGenPrefix) + "_anon_global");
}
- C = M.getOrInsertComdat(G->getName());
+
+ if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+ std::string Name = G->getName();
+ Name += InternalSuffix;
+ C = M.getOrInsertComdat(Name);
+ } else {
+ C = M.getOrInsertComdat(G->getName());
+ }
+
// Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
if (TargetTriple.isOSBinFormatCOFF())
C->setSelectionKind(Comdat::NoDuplicates);
@@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
}
IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
- Function *AsanDtorFunction =
+ AsanDtorFunction =
Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
- appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
}
@@ -1665,8 +1711,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
"global metadata will not be padded appropriately");
Metadata->setAlignment(SizeOfGlobalStruct);
- SetComdatForGlobalMetadata(G, Metadata);
+ SetComdatForGlobalMetadata(G, Metadata, "");
+ }
+}
+
+void AddressSanitizerModule::InstrumentGlobalsELF(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers,
+ const std::string &UniqueModuleId) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+ SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+ MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+ Metadata->setMetadata(LLVMContext::MD_associated, MD);
+ MetadataGlobals[i] = Metadata;
+
+ SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
}
+
+ // Update llvm.compiler.used, adding the new metadata globals. This is
+ // needed so that during LTO these variables stay alive.
+ if (!MetadataGlobals.empty())
+ appendToCompilerUsed(M, MetadataGlobals);
+
+ // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+ // to look up the loaded image that contains it. Second, we can store in it
+ // whether registration has already occurred, to prevent duplicate
+ // registration.
+ //
+ // Common linkage ensures that there is only one global per shared library.
+ GlobalVariable *RegisteredFlag = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::CommonLinkage,
+ ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+ RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+ // Create start and stop symbols.
+ GlobalVariable *StartELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__start_" + getGlobalMetadataSection());
+ StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+ GlobalVariable *StopELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__stop_" + getGlobalMetadataSection());
+ StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+
+ // Create a call to register the globals with the runtime.
+ IRB.CreateCall(AsanRegisterElfGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+ IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+ IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+ IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+ IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
}
void AddressSanitizerModule::InstrumentGlobalsMachO(
@@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
+// Sets *CtorComdat to true if the global registration code emitted into the
+// asan constructor is comdat-compatible.
+bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+ *CtorComdat = false;
GlobalsMD.init(M);
SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
}
size_t n = GlobalsToChange.size();
- if (n == 0) return false;
+ if (n == 0) {
+ *CtorComdat = true;
+ return false;
+ }
auto &DL = M.getDataLayout();
@@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
Initializers[i] = Initializer;
}
- if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+ std::string ELFUniqueModuleId =
+ (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
+ : "";
+
+ if (!ELFUniqueModuleId.empty()) {
+ InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+ *CtorComdat = true;
+ } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
} else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
@@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
if (CompileKernel)
return false;
- Function *AsanCtorFunction;
+ // Create a module constructor. A destructor is created lazily because not all
+ // platforms, and not all modules need it.
std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
/*InitArgs=*/{}, kAsanVersionCheckName);
- appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+ bool CtorComdat = true;
bool Changed = false;
// TODO(glider): temporarily disabled globals instrumentation for KASan.
if (ClGlobals) {
IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
- Changed |= InstrumentGlobals(IRB, M);
+ Changed |= InstrumentGlobals(IRB, M, &CtorComdat);
+ }
+
+ // Put the constructor and destructor in comdat if both
+ // (1) global instrumentation is not TU-specific
+ // (2) target is ELF.
+ if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
+ AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority,
+ AsanCtorFunction);
+ if (AsanDtorFunction) {
+ AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority,
+ AsanDtorFunction);
+ }
+ } else {
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+ if (AsanDtorFunction)
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
}
return Changed;
@@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
- replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false);
+ replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref);
AI->replaceAllUsesWith(NewAllocaPtr);
}
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index d7eb857cff7e1..493d014586c6a 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -771,7 +771,7 @@ public:
if (perform(MI)) {
Changed = true;
++NumOfPGOMemOPOpt;
- DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName()
+ DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
<< "is Transformed.\n");
}
}
@@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
ActualCount = *BBEdgeCount;
}
+ ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+ DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
+ << "\n");
+ DEBUG(
+ for (auto &VD
+ : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
+
if (ActualCount < MemOPCountThreshold)
return false;
+ // Skip if the total value profiled count is 0, in which case we can't
+ // scale up the counts properly (and there is no profitable transformation).
+ if (TotalCount == 0)
+ return false;
- ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
TotalCount = ActualCount;
if (MemOPScaleCount)
- DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount
+ DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
<< " denominator = " << SavedTotalCount << "\n");
// Keeping track of the count of the default case:
@@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
MaxCount = RemainCount;
uint64_t SumForOpt = TotalCount - RemainCount;
- DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs "
- << TotalCount << "\n");
- DEBUG(
- for (auto &VD
- : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
- << " Versions\n");
+ << " Versions (covering " << SumForOpt << " out of "
+ << TotalCount << ")\n");
// mem_op(..., size)
// ==>
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 190f05db4b0c1..3e480a6df446d 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2643,7 +2643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"ByVal argument is not a pointer!");
Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
if (ArgOffset + Size > kParamTLSSize) break;
- unsigned ParamAlignment = CS.getParamAlignment(i + 1);
+ unsigned ParamAlignment = CS.getParamAlignment(i);
unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
Store = IRB.CreateMemCpy(ArgShadowBase,
getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
@@ -3502,7 +3502,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
assert(A->getType()->isPointerTy());
Type *RealTy = A->getType()->getPointerElementType();
uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
- uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1);
+ uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
if (ArgAlign < 8)
ArgAlign = 8;
VAArgOffset = alignTo(VAArgOffset, ArgAlign);
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index f02b75f0b4560..cd9b3d96a14f7 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) {
RecursivelyDeleteTriviallyDeadInstructions(OldArg);
}
+/// If Inst is a ReturnRV and its operand is a call or invoke, return the
+/// operand. Otherwise return null.
+static inline const Instruction *getreturnRVOperand(const Instruction &Inst,
+ ARCInstKind Class) {
+ if (Class != ARCInstKind::RetainRV)
+ return nullptr;
+
+ const auto *Opnd = Inst.getOperand(0)->stripPointerCasts();
+ if (const auto *C = dyn_cast<CallInst>(Opnd))
+ return C;
+ return dyn_cast<InvokeInst>(Opnd);
+}
+
} // end namespace objcarc
} // end namespace llvm
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index c1bbc4e96b16d..d13e941044f14 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
const Value *Ptr,
ProvenanceAnalysis &PA,
ARCInstKind Class) {
+ auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){
+ assert(!HasReverseInsertPts());
+ SetSeq(NewSeq);
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ InsertReverseInsertPt(&*BB->getFirstInsertionPt());
+ else
+ InsertReverseInsertPt(&*++Inst->getIterator());
+ };
+
// Check for possible direct uses.
switch (GetSeq()) {
case S_Release:
@@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
if (CanUse(Inst, Ptr, PA, Class)) {
DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr
<< "\n");
- assert(!HasReverseInsertPts());
- // If this is an invoke instruction, we're scanning it as part of
- // one of its successor blocks, since we can't insert code after it
- // in its own block, and we don't want to split critical edges.
- if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(&*BB->getFirstInsertionPt());
- else
- InsertReverseInsertPt(&*++Inst->getIterator());
- SetSeq(S_Use);
+ SetSeqAndInsertReverseInsertPt(S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; "
<< *Ptr << "\n");
// Non-movable releases depend on any possible objc pointer use.
- SetSeq(S_Stop);
- assert(!HasReverseInsertPts());
- // As above; handle invoke specially.
- if (isa<InvokeInst>(Inst))
- InsertReverseInsertPt(&*BB->getFirstInsertionPt());
- else
- InsertReverseInsertPt(&*++Inst->getIterator());
+ SetSeqAndInsertReverseInsertPt(S_Stop);
+ } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) {
+ if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) {
+ DEBUG(dbgs() << " ReleaseUse: Seq: " << GetSeq() << "; "
+ << *Ptr << "\n");
+ SetSeqAndInsertReverseInsertPt(S_Stop);
+ }
}
break;
case S_Stop:
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b323ab3bd443c..523390758769a 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts
Scalar.cpp
Scalarizer.cpp
SeparateConstOffsetFromGEP.cpp
+ SimpleLoopUnswitch.cpp
SimplifyCFGPass.cpp
Sink.cpp
SpeculativeExecution.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b5a4cc2f39533..dc864f48bf1f8 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI,
Changed = true;
}
- if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
+ if (Value *V = SimplifyInstruction(P, SQ)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
return false;
LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
- auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
- auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
- const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
- return runImpl(F, LVI, SQ);
+ return runImpl(F, LVI, getBestSimplifyQuery(*this, F));
}
PreservedAnalyses
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
- auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
- auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
- auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
- const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
- bool Changed = runImpl(F, LVI, SQ);
+ bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F));
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 04479b6e49ac8..d8f8a58a5fdfa 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -253,6 +253,7 @@ public:
const TargetTransformInfo &TTI;
DominatorTree &DT;
AssumptionCache &AC;
+ const SimplifyQuery SQ;
MemorySSA *MSSA;
std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
typedef RecyclingAllocator<
@@ -315,9 +316,10 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
- DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
- : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+ EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, DominatorTree &DT,
+ AssumptionCache &AC, MemorySSA *MSSA)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
}
@@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
/// stores which can occur in bitfield code among other things.
Instruction *LastStore = nullptr;
- const DataLayout &DL = BB->getModule()->getDataLayout();
-
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
@@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Skip assume intrinsics, they don't really have side effects (although
// they're marked as such to ensure preservation of control dependencies),
- // and this pass will not disturb any of the assumption's control
- // dependencies.
+ // and this pass will not bother with its removal. However, we should mark
+ // its condition as true for all dominated blocks.
if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
- DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+ auto *CondI =
+ dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
+ if (CondI && SimpleValue::canHandle(CondI)) {
+ DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n');
+ AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+ } else
+ DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
continue;
}
@@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
if (auto *CondI =
dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
- // The condition we're on guarding here is true for all dominated
- // locations.
- if (SimpleValue::canHandle(CondI))
+ if (SimpleValue::canHandle(CondI)) {
+ // Do we already know the actual value of this condition?
+ if (auto *KnownCond = AvailableValues.lookup(CondI)) {
+ // Is the condition known to be true?
+ if (isa<ConstantInt>(KnownCond) &&
+ cast<ConstantInt>(KnownCond)->isOneValue()) {
+ DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
+ removeMSSA(Inst);
+ Inst->eraseFromParent();
+ Changed = true;
+ continue;
+ } else
+ // Use the known value if it wasn't true.
+ cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
+ }
+ // The condition we're on guarding here is true for all dominated
+ // locations.
AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+ }
}
// Guard intrinsics read all memory, but don't write any memory.
@@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
+ if (Value *V = SimplifyInstruction(Inst, SQ)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
bool Killed = false;
if (!Inst->use_empty()) {
@@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto *MSSA =
UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
- EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -1008,7 +1029,7 @@ public:
auto *MSSA =
UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
- EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
return CSE.run();
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index be696df548d52..c04646eed49a8 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) {
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+ if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
bool Changed = false;
if (!I->use_empty()) {
I->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index dcb2a4a0c6e6b..3953198fe6052 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,7 +97,7 @@ class IndVarSimplify {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
bool Changed = false;
bool isValidRewrite(Value *FromVal, Value *ToVal);
@@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
Compare->getName());
// In the following deletions, PN may become dead and may be deleted.
- // Use a WeakVH to observe whether this happens.
- WeakVH WeakPH = PN;
+ // Use a WeakTrackingVH to observe whether this happens.
+ WeakTrackingVH WeakPH = PN;
// Delete the old floating point exit comparison. The branch starts using the
// new comparison.
@@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
//
BasicBlock *Header = L->getHeader();
- SmallVector<WeakVH, 8> PHIs;
+ SmallVector<WeakTrackingVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
PHIs.push_back(PN);
@@ -901,7 +901,7 @@ class WidenIV {
PHINode *WidePhi;
Instruction *WideInc;
const SCEV *WideIncExpr;
- SmallVectorImpl<WeakVH> &DeadInsts;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
SmallPtrSet<Instruction *,16> Widened;
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
@@ -941,20 +941,13 @@ class WidenIV {
}
public:
- WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
- ScalarEvolution *SEv, DominatorTree *DTree,
- SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
- OrigPhi(WI.NarrowIV),
- WideType(WI.WidestNativeType),
- LI(LInfo),
- L(LI->getLoopFor(OrigPhi->getParent())),
- SE(SEv),
- DT(DTree),
- HasGuards(HasGuards),
- WidePhi(nullptr),
- WideInc(nullptr),
- WideIncExpr(nullptr),
- DeadInsts(DI) {
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards)
+ : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+ HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
+ WideIncExpr(nullptr), DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
}
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 9e2563879da2b..5e116ef2fe75e 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -138,7 +138,7 @@ private:
// Tries to infer the specific address space of each address expression in
// Postorder.
- void inferAddressSpaces(const std::vector<Value *> &Postorder,
+ void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const;
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
@@ -147,7 +147,7 @@ private:
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
bool
- rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+ rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
Function *F) const;
@@ -162,7 +162,7 @@ private:
std::vector<std::pair<Value *, bool>> &PostorderStack,
DenseSet<Value *> &Visited) const;
- std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+ std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
@@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
+
+ // Generic addressing expressions may be hidden in nested constant
+ // expressions.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // TODO: Look in non-address parts, like icmp operands.
+ if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ PostorderStack.push_back(std::make_pair(CE, false));
+
+ return;
+ }
+
if (isAddressExpression(*V) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
- if (Visited.insert(V).second)
+ if (Visited.insert(V).second) {
PostorderStack.push_back(std::make_pair(V, false));
+
+ Operator *Op = cast<Operator>(V);
+ for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
+ if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ PostorderStack.emplace_back(CE, false);
+ }
+ }
+ }
}
}
// Returns all flat address expressions in function F. The elements are ordered
// ordered in postorder.
-std::vector<Value *>
+std::vector<WeakTrackingVH>
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
// use-def graph of function F.
@@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
PushPtrOperand(Cmp->getOperand(0));
PushPtrOperand(Cmp->getOperand(1));
}
+ } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
+ if (!ASC->getType()->isVectorTy())
+ PushPtrOperand(ASC->getPointerOperand());
}
}
- std::vector<Value *> Postorder; // The resultant postorder.
+ std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
while (!PostorderStack.empty()) {
+ Value *TopVal = PostorderStack.back().first;
// If the operands of the expression on the top are already explored,
// adds that expression to the resultant postorder.
if (PostorderStack.back().second) {
- Postorder.push_back(PostorderStack.back().first);
+ Postorder.push_back(TopVal);
PostorderStack.pop_back();
continue;
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().second = true;
- for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal)) {
appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
Visited);
}
@@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
return false;
// Collects all flat address expressions in postorder.
- std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+ std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
@@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
}
+// Constants need to be tracked through RAUW to handle cases with nested
+// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpaces::inferAddressSpaces(
- const std::vector<Value *> &Postorder,
+ ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
@@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
}
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
- const std::vector<Value *> &Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ ArrayRef<WeakTrackingVH> Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
}
+ SmallVector<Instruction *, 16> DeadInstructions;
+
// Replaces the uses of the old address expressions with the new ones.
- for (Value *V : Postorder) {
+ for (const WeakTrackingVH &WVH : Postorder) {
+ assert(WVH && "value was unexpectedly deleted");
+ Value *V = WVH;
Value *NewV = ValueWithNewAddrSpace.lookup(V);
if (NewV == nullptr)
continue;
@@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
DEBUG(dbgs() << "Replacing the uses of " << *V
<< "\n with\n " << *NewV << '\n');
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ C->getType());
+ if (C != Replace) {
+ DEBUG(dbgs() << "Inserting replacement const cast: "
+ << Replace << ": " << *Replace << '\n');
+ C->replaceAllUsesWith(Replace);
+ V = Replace;
+ }
+ }
+
Value::use_iterator I, E, Next;
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
Use &U = *I;
@@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
}
+ if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (ASC->getDestAddressSpace() == NewAS) {
+ ASC->replaceAllUsesWith(NewV);
+ DeadInstructions.push_back(ASC);
+ continue;
+ }
+ }
+
// Otherwise, replaces the use with flat(NewV).
if (Instruction *I = dyn_cast<Instruction>(V)) {
BasicBlock::iterator InsertPos = std::next(I->getIterator());
@@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
}
- if (V->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(V);
+ if (V->use_empty()) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ DeadInstructions.push_back(I);
+ }
}
+ for (Instruction *I : DeadInstructions)
+ RecursivelyDeleteTriviallyDeadInstructions(I);
+
return true;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a0da81605a809..7dacaba1193e8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+ Constant *OnlyVal = nullptr;
+ Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
+ unsigned PredWithKnownDest = 0;
for (const auto &PredValue : PredValues) {
BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
- // If the predecessor ends with an indirect goto, we can't change its
- // destination.
- if (isa<IndirectBrInst>(Pred->getTerminator()))
- continue;
-
Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
DestBB = nullptr;
- else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
- else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
} else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
+ assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (PredToDestList.empty())
+ if (PredToDestList.empty()) {
OnlyDest = DestBB;
- else if (OnlyDest != DestBB)
- OnlyDest = MultipleDestSentinel;
+ OnlyVal = Val;
+ } else {
+ if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+ // It possible we have same destination, but different value, e.g. default
+ // case in switchinst.
+ if (Val != OnlyVal)
+ OnlyVal = MultipleVal;
+ }
+
+ // We know where this predecessor is going.
+ ++PredWithKnownDest;
+
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
@@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// not thread. By doing so, we do not need to duplicate the current block and
// also miss potential opportunities in case we dont/cant duplicate.
if (OnlyDest && OnlyDest != MultipleDestSentinel) {
- if (PredToDestList.size() ==
+ if (PredWithKnownDest ==
(size_t)std::distance(pred_begin(BB), pred_end(BB))) {
bool SeenFirstBranchToOnlyDest = false;
for (BasicBlock *SuccBB : successors(BB)) {
@@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the condition is now dead due to the removal of the old terminator,
// erase it.
- auto *CondInst = dyn_cast<Instruction>(Cond);
- if (CondInst && CondInst->use_empty())
- CondInst->eraseFromParent();
- // FIXME: in case this instruction is defined in the current BB and it
- // resolves to a single value from all predecessors, we can do RAUW.
+ if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
+ if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
+ CondInst->eraseFromParent();
+ else if (OnlyVal && OnlyVal != MultipleVal &&
+ CondInst->getParent() == BB) {
+ // If we just learned Cond is the same value for all uses of the
+ // condition, replace it with a constant value
+ CondInst->replaceAllUsesWith(OnlyVal);
+ if (!CondInst->mayHaveSideEffects())
+ CondInst->eraseFromParent();
+ }
+ }
return true;
}
}
@@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV =
- SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
+ if (Value *IV = SimplifyInstruction(
+ New,
+ {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
delete New;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5042fc18d7c49..410fbb03068f4 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
- WeakVH InstPtr(&*I);
+ WeakTrackingVH InstPtr(&*I);
if (!processLoopMemSet(MSI, BECount))
continue;
MadeChange = true;
@@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
-/// for (i) A[i] = B[i];
+/// for (i) A[i] = B[i];
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *BECount) {
assert(SI->isSimple() && "Expected only non-volatile stores.");
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 28e71ca05436c..af095560cc025 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
+ Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC});
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 8ce96cf1b7a67..2ba9265566a8c 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
+ Value *V = SimplifyInstruction(C, SQ);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
@@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
- const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
- LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
+ const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
+ LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+ SQ);
bool Changed = LR.processLoop(&L);
if (!Changed)
@@ -714,10 +715,7 @@ public:
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
- auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- const SimplifyQuery SQ(DL, TLI, DT, AC);
+ const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
return LR.processLoop(L);
}
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a5a81c33a8ebd..35c05e84fd68d 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
bool Changed = false;
// Copy blocks into a temporary array to avoid iterator invalidation issues
// as we remove them.
- SmallVector<WeakVH, 16> Blocks(L.blocks());
+ SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
for (auto &Block : Blocks) {
// Attempt to merge blocks in the trivial case. Don't modify blocks which
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index af137f6faa634..ccedb98d7fa15 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S,
/// If any of the instructions is the specified set are trivially dead, delete
/// them and see if this makes any of their operands subsequently dead.
static bool
-DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -1845,7 +1845,7 @@ class LSRInstance {
void FinalizeChain(IVChain &Chain);
void CollectChains();
void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts);
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
@@ -1920,19 +1920,15 @@ class LSRInstance {
const LSRUse &LU,
SCEVExpander &Rewriter) const;
- Value *Expand(const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
- BasicBlock::iterator IP,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
+ Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+ BasicBlock::iterator IP, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
- void Rewrite(const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
+ const Formula &F, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+ void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
@@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
/// Generate an add or subtract for each IVInc in a chain to materialize the IV
/// user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
const IVInc &Head = Chain.Incs[0];
@@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
/// Emit instructions for the leading candidate expression for this LSRUse (this
/// is called "expanding").
-Value *LSRInstance::Expand(const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- BasicBlock::iterator IP,
+Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
+ const Formula &F, BasicBlock::iterator IP,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
if (LU.RigidFormula)
return LF.OperandValToReplace;
@@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU,
/// Helper for Rewrite. PHI nodes are special because the use of their operands
/// effectively happens in their predecessor blocks, so the expression may need
/// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(PHINode *PN,
- const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(
+ PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+ SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
/// Emit instructions for the leading candidate expression for this LSRUse (this
/// is called "expanding"), and update the UserInst to reference the newly
/// expanded value.
-void LSRInstance::Rewrite(const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
+ const Formula &F, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
"lsr");
@@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr");
#ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 8fa806a7e8bcc..6ef1464e9338e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
LoopProcessWorklist.push_back(NewLoop);
redoLoop = true;
- // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody
+ // Keep a WeakTrackingVH holding onto LIC. If the first call to
+ // RewriteLoopBody
// deletes the instruction (for example by simplifying a PHI that feeds into
// the condition that we're unswitching on), we don't rewrite the second
// iteration.
- WeakVH LICHandle(LIC);
+ WeakTrackingVH LICHandle(LIC);
// Now we rewrite the original code to know that the condition is true and the
// new code to know that the condition is false.
@@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I,
static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
Loop *L, LPPassManager *LPM) {
- DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+ DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
- I->eraseFromParent();
+ if (!I->mayHaveSideEffects())
+ I->eraseFromParent();
++NumSimplify;
}
@@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
- DEBUG(dbgs() << "Remove dead instruction '" << *I);
+ DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a3f3f25c1e0f6..21a632073da7a 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
// Get the alignment of the byval. If the call doesn't specify the alignment,
// then it is some target specific value that we can't know.
- unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo);
if (ByValAlign == 0) return false;
// If it is greater than the memcpy, then we check to see if we can force the
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index c5bf2f28d1852..d0bfe36038973 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
Changed = true;
SE->forgetValue(&*I);
I->replaceAllUsesWith(NewI);
- // If SeenExprs constains I's WeakVH, that entry will be replaced with
+ // If SeenExprs constains I's WeakTrackingVH, that entry will be
+ // replaced with
// nullptr.
RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
I = NewI->getIterator();
@@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
const SCEV *NewSCEV = SE->getSCEV(&*I);
- SeenExprs[NewSCEV].push_back(WeakVH(&*I));
+ SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
// weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
//
// This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(WeakVH(&*I));
+ SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));
}
}
}
@@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
// future instruction either. Therefore, we pop it out of the stack. This
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
- // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+ // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
+ // removed
// during rewriting.
if (Value *Candidate = Candidates.back()) {
Instruction *CandidateInstruction = cast<Instruction>(Candidate);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index a014ddd9ba0a4..162d91beae764 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -395,7 +395,6 @@ namespace {
class NewGVN {
Function &F;
DominatorTree *DT;
- AssumptionCache *AC;
const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
MemorySSA *MSSA;
@@ -405,6 +404,7 @@ class NewGVN {
BumpPtrAllocator ExpressionAllocator;
ArrayRecycler<Value *> ArgRecycler;
TarjanSCC SCCFinder;
+ const SimplifyQuery SQ;
// Number of function arguments, used by ranking
unsigned int NumFuncArgs;
@@ -504,8 +504,9 @@ public:
NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
const DataLayout &DL)
- : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
- PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+ : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+ PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
+ }
bool runGVN();
private:
@@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
E->op_push_back(lookupOperandLeader(Arg1));
E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
- DT, AC);
+ Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
return SimplifiedE;
return E;
@@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) {
"Wrong types on cmp instruction");
assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
E->getOperand(1)->getType() == I->getOperand(1)->getType()));
- Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
- DL, TLI, DT, AC);
+ Value *V =
+ SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<SelectInst>(I)) {
@@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) {
assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
E->getOperand(2)->getType() == I->getOperand(2)->getType());
Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), DL, TLI, DT, AC);
+ E->getOperand(2), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
} else if (I->isBinaryOp()) {
- Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
- DL, TLI, DT, AC);
+ Value *V =
+ SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (auto *BI = dyn_cast<BitCastInst>(I)) {
- Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
+ Value *V =
+ SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
- Value *V = SimplifyGEPInst(E->getType(),
- ArrayRef<Value *>(E->op_begin(), E->op_end()),
- DL, TLI, DT, AC);
+ Value *V = SimplifyGEPInst(
+ E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (AllConstant) {
@@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
if (PBranch->TrueEdge) {
// If we know the previous predicate is true and we are in the true
// edge then we may be implied true or false.
- if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
- BranchPredicate)) {
+ if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate,
+ OurPredicate)) {
addPredicateUsers(PI, I);
return createConstantExpression(
ConstantInt::getTrue(CI->getType()));
}
- if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
- BranchPredicate)) {
+ if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate,
+ OurPredicate)) {
addPredicateUsers(PI, I);
return createConstantExpression(
ConstantInt::getFalse(CI->getType()));
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 3dcab60907896..ef29d4141600a 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
/// Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
static Value *EmitAddTreeOfValues(Instruction *I,
- SmallVectorImpl<WeakVH> &Ops){
+ SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
Value *V1 = Ops.back();
@@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
- SmallVector<WeakVH, 4> NewMulOps;
+ SmallVector<WeakTrackingVH, 4> NewMulOps;
for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp =
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f344eb151464a..c11247c06b856 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
-static AttributeList legalizeCallAttributes(AttributeList AS) {
- AttributeList Ret;
-
- for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
- unsigned Index = AS.getSlotIndex(Slot);
-
- if (Index == AttributeList::ReturnIndex ||
- Index == AttributeList::FunctionIndex) {
-
- for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
-
- // Do not allow certain attributes - just skip them
- // Safepoint can not be read only or read none.
- if (Attr.hasAttribute(Attribute::ReadNone) ||
- Attr.hasAttribute(Attribute::ReadOnly))
- continue;
-
- // These attributes control the generation of the gc.statepoint call /
- // invoke itself; and once the gc.statepoint is in place, they're of no
- // use.
- if (isStatepointDirectiveAttr(Attr))
- continue;
-
- Ret = Ret.addAttributes(
- AS.getContext(), Index,
- AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
- }
- }
-
- // Just skip parameter attributes for now
- }
-
- return Ret;
+static AttributeList legalizeCallAttributes(AttributeList AL) {
+ if (AL.isEmpty())
+ return AL;
+
+ // Remove the readonly, readnone, and statepoint function attributes.
+ AttrBuilder FnAttrs = AL.getFnAttributes();
+ FnAttrs.removeAttribute(Attribute::ReadNone);
+ FnAttrs.removeAttribute(Attribute::ReadOnly);
+ for (Attribute A : AL.getFnAttributes()) {
+ if (isStatepointDirectiveAttr(A))
+ FnAttrs.remove(A);
+ }
+
+ // Just skip parameter and return attributes for now
+ LLVMContext &Ctx = AL.getContext();
+ return AttributeList::get(Ctx, AttributeList::FunctionIndex,
+ AttributeSet::get(Ctx, FnAttrs));
}
/// Helper function to place all gc relocates necessary for the given
@@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Call->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
- // function attributes.
- AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
- // In case if we can handle this set of attributes - set up function attrs
- // directly on statepoint and return attrs later for gc_result intrinsic.
- Call->setAttributes(AttributeList::get(Call->getContext(),
- AttributeList::FunctionIndex,
- NewAttrs.getFnAttributes()));
+ // function attributes. In case if we can handle this set of attributes -
+ // set up function attrs directly on statepoint and return attrs later for
+ // gc_result intrinsic.
+ Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
Token = Call;
@@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Invoke->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
- // function attributes.
- AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
- // In case if we can handle this set of attributes - set up function attrs
- // directly on statepoint and return attrs later for gc_result intrinsic.
- Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
- AttributeList::FunctionIndex,
- NewAttrs.getFnAttributes()));
+ // function attributes. In case if we can handle this set of attributes -
+ // set up function attrs directly on statepoint and return attrs later for
+ // gc_result intrinsic.
+ Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
Token = Invoke;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d01e91a7f2356..1d9beffaf06bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,6 +25,7 @@
#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter
Instruction *OldPtr;
// Track post-rewrite users which are PHI nodes and Selects.
- SmallPtrSetImpl<PHINode *> &PHIUsers;
- SmallPtrSetImpl<SelectInst *> &SelectUsers;
+ SmallSetVector<PHINode *, 8> &PHIUsers;
+ SmallSetVector<SelectInst *, 8> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
@@ -2199,8 +2200,8 @@ public:
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
VectorType *PromotableVecTy,
- SmallPtrSetImpl<PHINode *> &PHIUsers,
- SmallPtrSetImpl<SelectInst *> &SelectUsers)
+ SmallSetVector<PHINode *, 8> &PHIUsers,
+ SmallSetVector<SelectInst *, 8> &SelectUsers)
: DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
@@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
unsigned NumUses = 0;
- SmallPtrSet<PHINode *, 8> PHIUsers;
- SmallPtrSet<SelectInst *, 8> SelectUsers;
+ SmallSetVector<PHINode *, 8> PHIUsers;
+ SmallSetVector<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
@@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
- for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
- E = PHIUsers.end();
- I != E; ++I)
- if (!isSafePHIToSpeculate(**I)) {
+ for (PHINode *PHI : PHIUsers)
+ if (!isSafePHIToSpeculate(*PHI)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
- for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
- E = SelectUsers.end();
- I != E; ++I)
- if (!isSafeSelectToSpeculate(**I)) {
+
+ for (SelectInst *Sel : SelectUsers)
+ if (!isSafeSelectToSpeculate(*Sel)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 00e3c95f6f06d..52201d8f3e51a 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeCFGSimplifyPassPass(Registry);
initializeLateCFGSimplifyPassPass(Registry);
initializeStructurizeCFGPass(Registry);
+ initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPPass(Registry);
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4d594532c3651..cde659b9d189f 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
- if (isKnownNotFullPoison(I)) {
+ if (programUndefinedIfFullPoison(I)) {
const SCEV *Key =
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
DominatingExprs[Key].push_back(I);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
new file mode 100644
index 0000000000000..fb1b47c48276c
--- /dev/null
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -0,0 +1,626 @@
+//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "simple-loop-unswitch"
+
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+
+static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
+ Constant &Replacement) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // Replace uses of LIC in the loop with the given constant.
+ for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
+ // Grab the use and walk past it so we can clobber it in the use list.
+ Use *U = &*UI++;
+ Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+ if (!UserI || !L.contains(UserI))
+ continue;
+
+ // Replace this use within the loop body.
+ *U = &Replacement;
+ }
+}
+
+/// Update the dominator tree after removing one exiting predecessor of a loop
+/// exit block.
+static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
+ DominatorTree &DT) {
+ assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
+ "Cannot have empty predecessors of the loop exit block if we split "
+ "off a block to unswitch!");
+
+ BasicBlock *IDom = *pred_begin(LoopExitBB);
+ // Walk all of the other predecessors finding the nearest common dominator
+ // until all predecessors are covered or we reach the loop header. The loop
+ // header necessarily dominates all loop exit blocks in loop simplified form
+ // so we can early-exit the moment we hit that block.
+ for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB);
+ PI != PE && IDom != L.getHeader(); ++PI)
+ IDom = DT.findNearestCommonDominator(IDom, *PI);
+
+ DT.changeImmediateDominator(LoopExitBB, IDom);
+}
+
+/// Update the dominator tree after unswitching a particular former exit block.
+///
+/// This handles the full update of the dominator tree after hoisting a block
+/// that previously was an exit block (or split off of an exit block) up to be
+/// reached from the new immediate dominator of the preheader.
+///
+/// The common case is simple -- we just move the unswitched block to have an
+/// immediate dominator of the old preheader. But in complex cases, there may
+/// be other blocks reachable from the unswitched block that are immediately
+/// dominated by some node between the unswitched one and the old preheader.
+/// All of these also need to be hoisted in the dominator tree. We also want to
+/// minimize queries to the dominator tree because each step of this
+/// invalidates any DFS numbers that would make queries fast.
+static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
+ DominatorTree &DT) {
+ DomTreeNode *OldPHNode = DT[OldPH];
+ DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
+ // If the dominator tree has already been updated for this unswitched node,
+ // we're done. This makes it easier to use this routine if there are multiple
+ // paths to the same unswitched destination.
+ if (UnswitchedNode->getIDom() == OldPHNode)
+ return;
+
+ // First collect the domtree nodes that we are hoisting over. These are the
+ // set of nodes which may have children that need to be hoisted as well.
+ SmallPtrSet<DomTreeNode *, 4> DomChain;
+ for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
+ IDom = IDom->getIDom())
+ DomChain.insert(IDom);
+
+ // The unswitched block ends up immediately dominated by the old preheader --
+ // regardless of whether it is the loop exit block or split off of the loop
+ // exit block.
+ DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
+
+ // Blocks reachable from the unswitched block may need to change their IDom
+ // as well.
+ SmallSetVector<BasicBlock *, 4> Worklist;
+ for (auto *SuccBB : successors(UnswitchedBB))
+ Worklist.insert(SuccBB);
+
+ // Walk the worklist. We grow the list in the loop and so must recompute size.
+ for (int i = 0; i < (int)Worklist.size(); ++i) {
+ auto *BB = Worklist[i];
+
+ DomTreeNode *Node = DT[BB];
+ assert(!DomChain.count(Node) &&
+ "Cannot be dominated by a block you can reach!");
+ // If this block doesn't have an immediate dominator somewhere in the chain
+ // we hoisted over, then its position in the domtree hasn't changed. Either
+ // it is above the region hoisted and still valid, or it is below the
+ // hoisted block and so was trivially updated. This also applies to
+ // everything reachable from this block so we're completely done with the
+ // it.
+ if (!DomChain.count(Node->getIDom()))
+ continue;
+
+ // We need to change the IDom for this node but also walk its successors
+ // which could have similar dominance position.
+ DT.changeImmediateDominator(Node, OldPHNode);
+ for (auto *SuccBB : successors(BB))
+ Worklist.insert(SuccBB);
+ }
+}
+
+/// Unswitch a trivial branch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the branch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and one of the successors is a loop exit. This
+/// allows us to unswitch without duplicating the loop, making it trivial.
+///
+/// If this routine fails to unswitch the branch it returns false.
+///
+/// If the branch can be unswitched, this routine splits the preheader and
+/// hoists the branch above that split. Preserves loop simplified form
+/// (splitting the exit block as necessary). It simplifies the branch within
+/// the loop to an unconditional branch but doesn't remove it entirely. Further
+/// cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
+ LoopInfo &LI) {
+ assert(BI.isConditional() && "Can only unswitch a conditional branch!");
+ DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
+
+ Value *LoopCond = BI.getCondition();
+
+ // Need a trivial loop condition to unswitch.
+ if (!L.isLoopInvariant(LoopCond))
+ return false;
+
+ // FIXME: We should compute this once at the start and update it!
+ SmallVector<BasicBlock *, 16> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes
+ // it to do this.
+ ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
+ ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
+ int LoopExitSuccIdx = 0;
+ auto *LoopExitBB = BI.getSuccessor(0);
+ if (!ExitBlockSet.count(LoopExitBB)) {
+ std::swap(CondVal, Replacement);
+ LoopExitSuccIdx = 1;
+ LoopExitBB = BI.getSuccessor(1);
+ if (!ExitBlockSet.count(LoopExitBB))
+ return false;
+ }
+ auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
+ assert(L.contains(ContinueBB) &&
+ "Cannot have both successors exit and still be in the loop!");
+
+ // If the loop exit block contains phi nodes, this isn't trivial.
+ // FIXME: We should examine the PHI to determine whether or not we can handle
+ // it trivially.
+ if (isa<PHINode>(LoopExitBB->begin()))
+ return false;
+
+ DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
+ << " == " << LoopCond << "\n");
+
+ // Split the preheader, so that we know that there is a safe place to insert
+ // the conditional branch. We will change the preheader to have a conditional
+ // branch on LoopCond.
+ BasicBlock *OldPH = L.getLoopPreheader();
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we are
+ // unswitching. We need to split this if there are other loop predecessors.
+ // Because the loop is in simplified form, *any* other predecessor is enough.
+ BasicBlock *UnswitchedBB;
+ if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
+ (void)PredBB;
+ assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
+ UnswitchedBB = LoopExitBB;
+ } else {
+ UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+ }
+
+ BasicBlock *ParentBB = BI.getParent();
+
+ // Now splice the branch to gate reaching the new preheader and re-point its
+ // successors.
+ OldPH->getInstList().splice(std::prev(OldPH->end()),
+ BI.getParent()->getInstList(), BI);
+ OldPH->getTerminator()->eraseFromParent();
+ BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
+ BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
+
+ // Create a new unconditional branch that will continue the loop as a new
+ // terminator.
+ BranchInst::Create(ContinueBB, ParentBB);
+
+ // Now we need to update the dominator tree.
+ updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+ // But if we split something off of the loop exit block then we also removed
+ // one of the predecessors for the loop exit block and may need to update its
+ // idom.
+ if (UnswitchedBB != LoopExitBB)
+ updateLoopExitIDom(LoopExitBB, L, DT);
+
+ // Since this is an i1 condition we can also trivially replace uses of it
+ // within the loop with a constant.
+ replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
+
+ ++NumTrivial;
+ ++NumBranches;
+ return true;
+}
+
+/// Unswitch a trivial switch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the switch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and that at least one of the successors is a loop
+/// exit. This allows us to unswitch without duplicating the loop, making it
+/// trivial.
+///
+/// If this routine fails to unswitch the switch it returns false.
+///
+/// If the switch can be unswitched, this routine splits the preheader and
+/// copies the switch above that split. If the default case is one of the
+/// exiting cases, it copies the non-exiting cases and points them at the new
+/// preheader. If the default case is not exiting, it copies the exiting cases
+/// and points the default at the preheader. It preserves loop simplified form
+/// (splitting the exit blocks as necessary). It simplifies the switch within
+/// the loop by removing now-dead cases. If the default case is one of those
+/// unswitched, it replaces its destination with a new basic block containing
+/// only unreachable. Such basic blocks, while technically loop exits, are not
+/// considered for unswitching so this is a stable transform and the same
+/// switch will not be revisited. If after unswitching there is only a single
+/// in-loop successor, the switch is further simplified to an unconditional
+/// branch. Still more cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
+ LoopInfo &LI) {
+ DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
+ Value *LoopCond = SI.getCondition();
+
+ // If this isn't switching on an invariant condition, we can't unswitch it.
+ if (!L.isLoopInvariant(LoopCond))
+ return false;
+
+ // FIXME: We should compute this once at the start and update it!
+ SmallVector<BasicBlock *, 16> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+
+ SmallVector<int, 4> ExitCaseIndices;
+ for (auto Case : SI.cases()) {
+ auto *SuccBB = Case.getCaseSuccessor();
+ if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))
+ ExitCaseIndices.push_back(Case.getCaseIndex());
+ }
+ BasicBlock *DefaultExitBB = nullptr;
+ if (ExitBlockSet.count(SI.getDefaultDest()) &&
+ !isa<PHINode>(SI.getDefaultDest()->begin()) &&
+ !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+ DefaultExitBB = SI.getDefaultDest();
+ else if (ExitCaseIndices.empty())
+ return false;
+
+ DEBUG(dbgs() << " unswitching trivial cases...\n");
+
+ SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+ ExitCases.reserve(ExitCaseIndices.size());
+ // We walk the case indices backwards so that we remove the last case first
+ // and don't disrupt the earlier indices.
+ for (unsigned Index : reverse(ExitCaseIndices)) {
+ auto CaseI = SI.case_begin() + Index;
+ // Save the value of this case.
+ ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+ // Delete the unswitched cases.
+ SI.removeCase(CaseI);
+ }
+
+ // Check if after this all of the remaining cases point at the same
+ // successor.
+ BasicBlock *CommonSuccBB = nullptr;
+ if (SI.getNumCases() > 0 &&
+ std::all_of(std::next(SI.case_begin()), SI.case_end(),
+ [&SI](const SwitchInst::CaseHandle &Case) {
+ return Case.getCaseSuccessor() ==
+ SI.case_begin()->getCaseSuccessor();
+ }))
+ CommonSuccBB = SI.case_begin()->getCaseSuccessor();
+
+ if (DefaultExitBB) {
+ // We can't remove the default edge so replace it with an edge to either
+ // the single common remaining successor (if we have one) or an unreachable
+ // block.
+ if (CommonSuccBB) {
+ SI.setDefaultDest(CommonSuccBB);
+ } else {
+ BasicBlock *ParentBB = SI.getParent();
+ BasicBlock *UnreachableBB = BasicBlock::Create(
+ ParentBB->getContext(),
+ Twine(ParentBB->getName()) + ".unreachable_default",
+ ParentBB->getParent());
+ new UnreachableInst(ParentBB->getContext(), UnreachableBB);
+ SI.setDefaultDest(UnreachableBB);
+ DT.addNewBlock(UnreachableBB, ParentBB);
+ }
+ } else {
+ // If we're not unswitching the default, we need it to match any cases to
+ // have a common successor or if we have no cases it is the common
+ // successor.
+ if (SI.getNumCases() == 0)
+ CommonSuccBB = SI.getDefaultDest();
+ else if (SI.getDefaultDest() != CommonSuccBB)
+ CommonSuccBB = nullptr;
+ }
+
+ // Split the preheader, so that we know that there is a safe place to insert
+ // the switch.
+ BasicBlock *OldPH = L.getLoopPreheader();
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+ OldPH->getTerminator()->eraseFromParent();
+
+ // Now add the unswitched switch.
+ auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+
+ // Split any exit blocks with remaining in-loop predecessors. We walk in
+ // reverse so that we split in the same order as the cases appeared. This is
+ // purely for convenience of reading the resulting IR, but it doesn't cost
+ // anything really.
+ SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
+ // Handle the default exit if necessary.
+ // FIXME: It'd be great if we could merge this with the loop below but LLVM's
+ // ranges aren't quite powerful enough yet.
+ if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
+ auto *SplitBB =
+ SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
+ updateLoopExitIDom(DefaultExitBB, L, DT);
+ DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+ }
+ // Note that we must use a reference in the for loop so that we update the
+ // container.
+ for (auto &CasePair : reverse(ExitCases)) {
+ // Grab a reference to the exit block in the pair so that we can update it.
+ BasicBlock *&ExitBB = CasePair.second;
+
+ // If this case is the last edge into the exit block, we can simply reuse it
+ // as it will no longer be a loop exit. No mapping necessary.
+ if (pred_empty(ExitBB))
+ continue;
+
+ // Otherwise we need to split the exit block so that we retain an exit
+ // block from the loop and a target for the unswitched condition.
+ BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
+ if (!SplitExitBB) {
+ // If this is the first time we see this, do the split and remember it.
+ SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+ updateLoopExitIDom(ExitBB, L, DT);
+ }
+ ExitBB = SplitExitBB;
+ }
+
+ // Now add the unswitched cases. We do this in reverse order as we built them
+ // in reverse order.
+ for (auto CasePair : reverse(ExitCases)) {
+ ConstantInt *CaseVal = CasePair.first;
+ BasicBlock *UnswitchedBB = CasePair.second;
+
+ NewSI->addCase(CaseVal, UnswitchedBB);
+ updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+ }
+
+ // If the default was unswitched, re-point it and add explicit cases for
+ // entering the loop.
+ if (DefaultExitBB) {
+ NewSI->setDefaultDest(DefaultExitBB);
+ updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
+
+ // We removed all the exit cases, so we just copy the cases to the
+ // unswitched switch.
+ for (auto Case : SI.cases())
+ NewSI->addCase(Case.getCaseValue(), NewPH);
+ }
+
+ // If we ended up with a common successor for every path through the switch
+ // after unswitching, rewrite it to an unconditional branch to make it easy
+ // to recognize. Otherwise we potentially have to recognize the default case
+ // pointing at unreachable and other complexity.
+ if (CommonSuccBB) {
+ BasicBlock *BB = SI.getParent();
+ SI.eraseFromParent();
+ BranchInst::Create(CommonSuccBB, BB);
+ }
+
+ DT.verifyDomTree();
+ ++NumTrivial;
+ ++NumSwitches;
+ return true;
+}
+
+/// This routine scans the loop to find a branch or switch which occurs before
+/// any side effects occur. These can potentially be unswitched without
+/// duplicating the loop. If a branch or switch is successfully unswitched the
+/// scanning continues to see if subsequent branches or switches have become
+/// trivial. Once all trivial candidates have been unswitched, this routine
+/// returns.
+///
+/// The return value indicates whether anything was unswitched (and therefore
+/// changed).
+static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
+ LoopInfo &LI) {
+ bool Changed = false;
+
+ // If loop header has only one reachable successor we should keep looking for
+ // trivial condition candidates in the successor as well. An alternative is
+ // to constant fold conditions and merge successors into loop header (then we
+ // only need to check header's terminator). The reason for not doing this in
+ // LoopUnswitch pass is that it could potentially break LoopPassManager's
+ // invariants. Folding dead branches could either eliminate the current loop
+ // or make other loops unreachable. LCSSA form might also not be preserved
+ // after deleting branches. The following code keeps traversing loop header's
+ // successors until it finds the trivial condition candidate (condition that
+ // is not a constant). Since unswitching generates branches with constant
+ // conditions, this scenario could be very common in practice.
+ BasicBlock *CurrentBB = L.getHeader();
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ Visited.insert(CurrentBB);
+ do {
+ // Check if there are any side-effecting instructions (e.g. stores, calls,
+ // volatile loads) in the part of the loop that the code *would* execute
+ // without unswitching.
+ if (llvm::any_of(*CurrentBB,
+ [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return Changed;
+
+ TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+
+ if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // Don't bother trying to unswitch past a switch with a constant
+ // condition. This should be removed prior to running this pass by
+ // simplify-cfg.
+ if (isa<Constant>(SI->getCondition()))
+ return Changed;
+
+ if (!unswitchTrivialSwitch(L, *SI, DT, LI))
+ // Coludn't unswitch this one so we're done.
+ return Changed;
+
+ // Mark that we managed to unswitch something.
+ Changed = true;
+
+ // If unswitching turned the terminator into an unconditional branch then
+ // we can continue. The unswitching logic specifically works to fold any
+ // cases it can into an unconditional branch to make it easier to
+ // recognize here.
+ auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
+ if (!BI || BI->isConditional())
+ return Changed;
+
+ CurrentBB = BI->getSuccessor(0);
+ continue;
+ }
+
+ auto *BI = dyn_cast<BranchInst>(CurrentTerm);
+ if (!BI)
+ // We do not understand other terminator instructions.
+ return Changed;
+
+ // Don't bother trying to unswitch past an unconditional branch or a branch
+ // with a constant value. These should be removed by simplify-cfg prior to
+ // running this pass.
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return Changed;
+
+ // Found a trivial condition candidate: non-foldable conditional branch. If
+ // we fail to unswitch this, we can't do anything else that is trivial.
+ if (!unswitchTrivialBranch(L, *BI, DT, LI))
+ return Changed;
+
+ // Mark that we managed to unswitch something.
+ Changed = true;
+
+ // We unswitched the branch. This should always leave us with an
+ // unconditional branch that we can follow now.
+ BI = cast<BranchInst>(CurrentBB->getTerminator());
+ assert(!BI->isConditional() &&
+ "Cannot form a conditional branch by unswitching1");
+ CurrentBB = BI->getSuccessor(0);
+
+ // When continuing, if we exit the loop or reach a previous visited block,
+ // then we can not reach any trivial condition candidates (unfoldable
+ // branch instructions or switch instructions) and no unswitch can happen.
+ } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
+
+ return Changed;
+}
+
+/// Unswitch control flow predicated on loop invariant conditions.
+///
+/// This first hoists all branches or switches which are trivial (IE, do not
+/// require duplicating any part of the loop) out of the loop body. It then
+/// looks at other loop invariant control flows and tries to unswitch those as
+/// well by cloning the loop if the result is small enough.
+static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC) {
+ assert(L.isLCSSAForm(DT) &&
+ "Loops must be in LCSSA form before unswitching.");
+ bool Changed = false;
+
+ // Must be in loop simplified form: we need a preheader and dedicated exits.
+ if (!L.isLoopSimplifyForm())
+ return false;
+
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ Changed |= unswitchAllTrivialConditions(L, DT, LI);
+
+ // FIXME: Add support for non-trivial unswitching by cloning the loop.
+
+ return Changed;
+}
+
+PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function &F = *L.getHeader()->getParent();
+ (void)F;
+
+ DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
+
+ if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC))
+ return PreservedAnalyses::all();
+
+#ifndef NDEBUG
+ // Historically this pass has had issues with the dominator tree so verify it
+ // in asserts builds.
+ AR.DT.verifyDomTree();
+#endif
+ return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class SimpleLoopUnswitchLegacyPass : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
+ initializeSimpleLoopUnswitchLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ getLoopAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ bool Changed = unswitchLoop(*L, DT, LI, AC);
+
+#ifndef NDEBUG
+ // Historically this pass has had issues with the dominator tree so verify it
+ // in asserts builds.
+ DT.verifyDomTree();
+#endif
+ return Changed;
+}
+
+char SimpleLoopUnswitchLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+ "Simple unswitch loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+ "Simple unswitch loops", false, false)
+
+Pass *llvm::createSimpleLoopUnswitchLegacyPass() {
+ return new SimpleLoopUnswitchLegacyPass();
+}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 22af21d55c019..3d5cbfc93f2e6 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
// Recursively deleting a PHI may cause multiple PHIs to be deleted
- // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
- SmallVector<WeakVH, 8> PHIs;
+ // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
+ SmallVector<WeakTrackingVH, 8> PHIs;
for (BasicBlock::iterator I = BB->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
PHIs.push_back(PN);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 385c12302e040..d5124ac89016f 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -245,7 +245,7 @@ namespace {
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
std::vector<const BasicBlock*> &ToClone){
- WeakVH &BBEntry = VMap[BB];
+ WeakTrackingVH &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Make a second pass over the PHINodes now that all of them have been
// remapped into the new function, simplifying the PHINode and performing any
// recursive simplifications exposed. This will transparently update the
- // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+ // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
// two PHINodes, the iteration over the old PHIs remains valid, and the
// mapping will just map us to the new node (which may not even be a PHI
// node).
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5d6fbc3325fff..6d56e08af99f9 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// modify the struct.
if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
- CalledFunc->getParamAlignment(ArgNo+1));
+ CalledFunc->getParamAlignment(ArgNo));
if (ActualArg != *AI)
ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
@@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AssumptionCache *AC =
IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
auto &DL = Caller->getParent()->getDataLayout();
- if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) {
+ if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index fe93d6927c638..42aca757c2afd 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -48,16 +49,6 @@ using namespace llvm;
STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
-static cl::opt<bool> LibCallsShrinkWrapDoDomainError(
- "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden,
- cl::desc("Perform shrink-wrap on lib calls with domain errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoRangeError(
- "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden,
- cl::desc("Perform shrink-wrap on lib calls with range errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoPoleError(
- "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden,
- cl::desc("Perform shrink-wrap on lib calls with pole errors"));
-
namespace {
class LibCallsShrinkWrapLegacyPass : public FunctionPass {
public:
@@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
namespace {
class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
public:
- LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){};
- bool isChanged() const { return Changed; }
+ LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
+ : TLI(TLI), DT(DT){};
void visitCallInst(CallInst &CI) { checkCandidate(CI); }
- void perform() {
+ bool perform() {
+ bool Changed = false;
for (auto &CI : WorkList) {
DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
<< "\n");
@@ -94,6 +86,7 @@ public:
DEBUG(dbgs() << "Transformed\n");
}
}
+ return Changed;
}
private:
@@ -134,8 +127,8 @@ private:
}
const TargetLibraryInfo &TLI;
+ DominatorTree *DT;
SmallVector<CallInst *, 16> WorkList;
- bool Changed;
};
} // end anonymous namespace
@@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
case LibFunc_atanhf: // Same as atanh
case LibFunc_atanhl: // Same as atanh
{
- if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
- return false;
++NumWrappedTwoCond;
Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
break;
@@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
case LibFunc_logbf: // Same as log
case LibFunc_logbl: // Same as log
{
- if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
- return false;
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
break;
@@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
case LibFunc_log1pf: // Same as log1p
case LibFunc_log1pl: // Same as log1p
{
- if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
- return false;
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
break;
@@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
// RangeError: overflow or underflow
case LibFunc_powf:
case LibFunc_powl: {
- if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
- !LibCallsShrinkWrapDoRangeError)
- return false;
Cond = generateCondForPow(CI, Func);
if (Cond == nullptr)
return false;
@@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
UpperBound = 11356.0f;
break;
default:
- llvm_unreachable("Should be reach here");
+ llvm_unreachable("Unhandled library call!");
}
++NumWrappedOneCond;
@@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
UpperBound = 11383.0f;
break;
default:
- llvm_unreachable("Should be reach here");
+ llvm_unreachable("Unhandled library call!");
}
++NumWrappedTwoCond;
@@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
// Wrap conditions that can potentially generate errno to the library call.
void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
- assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst");
+ assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
MDNode *BranchWeights =
MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+
TerminatorInst *NewInst =
- SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights);
+ SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
BasicBlock *CallBB = NewInst->getParent();
CallBB->setName("cdce.call");
- CallBB->getSingleSuccessor()->setName("cdce.end");
+ BasicBlock *SuccBB = CallBB->getSingleSuccessor();
+ assert(SuccBB && "The split block should have a single successor");
+ SuccBB->setName("cdce.end");
CI->removeFromParent();
CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
DEBUG(dbgs() << "== Basic Block After ==");
@@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {
TLI.getLibFunc(*Callee, Func);
assert(Func && "perform() is not expecting an empty function");
- if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func))
- return true;
-
- if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func))
+ if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
return true;
-
return performCallErrors(CI, Func);
}
void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
-static bool runImpl(Function &F, const TargetLibraryInfo &TLI) {
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+ DominatorTree *DT) {
if (F.hasFnAttribute(Attribute::OptimizeForSize))
return false;
- LibCallsShrinkWrap CCDCE(TLI);
+ LibCallsShrinkWrap CCDCE(TLI, DT);
CCDCE.visit(F);
- CCDCE.perform();
- return CCDCE.isChanged();
+ bool Changed = CCDCE.perform();
+
+// Verify the dominator after we've updated it locally.
+#ifndef NDEBUG
+ if (DT)
+ DT->verifyDomTree();
+#endif
+ return Changed;
}
bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return runImpl(F, TLI);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ return runImpl(F, TLI, DT);
}
namespace llvm {
@@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() {
PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- bool Changed = runImpl(F, TLI);
- if (!Changed)
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ if (!runImpl(F, TLI, DT))
return PreservedAnalyses::all();
auto PA = PreservedAnalyses();
PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index d3002c5fb7502..ce6b703f3528d 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
// that can be removed.
BB->removePredecessor(Pred, true);
- WeakVH PhiIt = &BB->front();
+ WeakTrackingVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
Value *OldPhiIt = PhiIt;
@@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
- if (Offset > 0) {
- Ops.push_back(dwarf::DW_OP_plus);
- Ops.push_back(Offset);
- } else if (Offset < 0) {
- Ops.push_back(dwarf::DW_OP_minus);
- Ops.push_back(-Offset);
- }
-}
-
-enum { WithStackValue = true };
-
-/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
-/// into a stack value.
-static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
- bool Deref, int64_t Offset = 0,
- bool StackValue = false) {
- if (!Deref && !Offset && !StackValue)
- return DIExpr;
-
- SmallVector<uint64_t, 8> Ops;
- appendOffset(Ops, Offset);
- if (Deref)
- Ops.push_back(dwarf::DW_OP_deref);
- if (DIExpr)
- for (auto Op : DIExpr->expr_ops()) {
- // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
- if (StackValue) {
- if (Op.getOp() == dwarf::DW_OP_stack_value)
- StackValue = false;
- else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
- Ops.push_back(dwarf::DW_OP_stack_value);
- StackValue = false;
- }
- }
- Ops.push_back(Op.getOp());
- for (unsigned I = 0; I < Op.getNumArgs(); ++I)
- Ops.push_back(Op.getArg(I));
- }
- if (StackValue)
- Ops.push_back(dwarf::DW_OP_stack_value);
- return Builder.createExpression(Ops);
-}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
@@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIVar = DDI->getVariable();
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
-
- DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
-
+ DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
if (Offset) {
SmallVector<uint64_t, 4> Ops;
Ops.push_back(dwarf::DW_OP_deref);
- appendOffset(Ops, Offset);
+ DIExpression::appendOffset(Ops, Offset);
Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
DIExpr = Builder.createExpression(Ops);
}
@@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) {
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
// GEP offsets are i32 and thus always fit into an int64_t.
- DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),
- WithStackValue);
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
+ Offset.getSExtValue(),
+ DIExpression::WithStackValue);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
- DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
II->setAttributes(CI->getAttributes());
// Make sure that anything using the call now uses the invoke! This also
- // updates the CallGraph if present, because it uses a WeakVH.
+ // updates the CallGraph if present, because it uses a WeakTrackingVH.
CI->replaceAllUsesWith(II);
// Delete the original call
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e7ba19665d591..72c06aef80370 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -628,7 +628,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
if (SE) SE->forgetValue(PN);
if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 43ab725b07691..4ab4d7949d233 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// Simplify any new induction variables in the partially unrolled loop.
if (SE && !CompletelyUnroll && Count > 1) {
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
// Aggressively clean up dead instructions that simplifyLoopIVs already
@@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = &*I++;
- if (Value *V = SimplifyInstruction(Inst, DL))
+ if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
if (LI->replacementPreservesLCSSAForm(Inst, V))
Inst->replaceAllUsesWith(V);
if (isInstructionTriviallyDead(Inst))
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index dbe42c201dd4f..29d334f2968f1 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions(
ComdatEntriesCovered.end();
});
}
+
+std::string llvm::getUniqueModuleId(Module *M) {
+ MD5 Md5;
+ bool ExportsSymbols = false;
+ auto AddGlobal = [&](GlobalValue &GV) {
+ if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+ !GV.hasExternalLinkage())
+ return;
+ ExportsSymbols = true;
+ Md5.update(GV.getName());
+ Md5.update(ArrayRef<uint8_t>{0});
+ };
+
+ for (auto &F : *M)
+ AddGlobal(F);
+ for (auto &GV : M->globals())
+ AddGlobal(GV);
+ for (auto &GA : M->aliases())
+ AddGlobal(GA);
+ for (auto &IF : M->ifuncs())
+ AddGlobal(IF);
+
+ if (!ExportsSymbols)
+ return "";
+
+ MD5::MD5Result R;
+ Md5.final(R);
+
+ SmallString<32> Str;
+ MD5::stringifyResult(R, Str);
+ return ("$" + Str).str();
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a33b85c4ee69a..cdba982e6641f 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -225,10 +225,10 @@ struct PromoteMem2Reg {
std::vector<AllocaInst *> Allocas;
DominatorTree &DT;
DIBuilder DIB;
-
/// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
AssumptionCache *AC;
+ const SimplifyQuery SQ;
/// Reverse mapping of Allocas.
DenseMap<AllocaInst *, unsigned> AllocaLookup;
@@ -270,7 +270,8 @@ public:
AssumptionCache *AC)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
- AC(AC) {}
+ AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
+ nullptr, &DT, AC) {}
void run();
@@ -673,8 +674,6 @@ void PromoteMem2Reg::run() {
A->eraseFromParent();
}
- const DataLayout &DL = F.getParent()->getDataLayout();
-
// Remove alloca's dbg.declare instrinsics from the function.
for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -699,7 +698,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, SQ)) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
NewPhiNodes.erase(I++);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f86e97b6cc72f..7a3e8b9ae915d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
}
// Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, DL)) {
+ if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
@@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, DL)) {
+ if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
continue;
@@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
- if (Value *V = SimplifyInstruction(ICI, DL)) {
+ if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index a4cc6a031ad4c..02a5d3dbeadfb 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -51,13 +51,13 @@ namespace {
ScalarEvolution *SE;
DominatorTree *DT;
- SmallVectorImpl<WeakVH> &DeadInsts;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
bool Changed;
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
: L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
assert(LI && "IV simplification requires LoopInfo");
}
@@ -701,7 +701,7 @@ void IVVisitor::anchor() { }
/// Simplify instructions that use this induction variable
/// by using ScalarEvolution to analyze the IV's recurrence.
bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
IVVisitor *V) {
SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
SIV.simplifyUsers(CurrIV, V);
@@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
/// Simplify users of induction variables within this
/// loop. This does not actually change or add IVs.
bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
+ LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
bool Changed = false;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 27373427d4f7d..2509b5f22046b 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
// Don't waste time simplifying unused instructions.
if (!I->use_empty()) {
- if (Value *V =
- SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {
+ if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5549444047088..f112c555205c3 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
}
/// \brief Check that the Values in the slice in VL array are still existent in
-/// the WeakVH array.
+/// the WeakTrackingVH array.
/// Vectorization of part of the VL array may cause later values in the VL array
-/// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
- unsigned SliceBegin, unsigned SliceSize) {
+/// to become invalid. We track when this has happened in the WeakTrackingVH
+/// array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
+ ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin,
+ unsigned SliceSize) {
VL = VL.slice(SliceBegin, SliceSize);
VH = VH.slice(SliceBegin, SliceSize);
return !std::equal(VL.begin(), VL.end(), VH.begin());
@@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
return false;
// Keep track of values that were deleted by vectorizing in the loop below.
- SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+ SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());
bool Changed = false;
// Look for profitable vectorizable trees at all offsets, starting at zero.
@@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
bool Changed = false;
// Keep track of values that were deleted by vectorizing in the loop below.
- SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
+ SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
unsigned NextInst = 0, MaxInst = VL.size();
for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
@@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
namespace {
/// Tracks instructons and its children.
-class WeakVHWithLevel final : public CallbackVH {
+class WeakTrackingVHWithLevel final : public CallbackVH {
/// Operand index of the instruction currently beeing analized.
unsigned Level = 0;
/// Is this the instruction that should be vectorized, or are we now
@@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH {
bool IsInitial = true;
public:
- explicit WeakVHWithLevel() = default;
- WeakVHWithLevel(Value *V) : CallbackVH(V){};
+ explicit WeakTrackingVHWithLevel() = default;
+ WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
/// Restart children analysis each time it is repaced by the new instruction.
void allUsesReplacedWith(Value *New) override {
setValPtr(New);
@@ -4771,7 +4773,7 @@ public:
cast<Instruction>(getValPtr())->getNumOperands() > Level);
return cast<Instruction>(getValPtr())->getOperand(Level++);
}
- virtual ~WeakVHWithLevel() = default;
+ virtual ~WeakTrackingVHWithLevel() = default;
};
} // namespace
@@ -4793,7 +4795,7 @@ static bool canBeVectorized(
if (Root->getParent() != BB)
return false;
- SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
+ SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
SmallSet<Value *, 8> VisitedInstrs;
bool Res = false;
while (!Stack.empty()) {
@@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
// Some of the candidates may have already been vectorized after we
- // initially collected them. If so, the WeakVHs will have nullified the
+ // initially collected them. If so, the WeakTrackingVHs will have
+ // nullified the
// values, so remove them from the set of candidates.
Candidates.remove(nullptr);