summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-01 20:58:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-01 20:58:36 +0000
commitf382538d471e38a9b98f016c4caebd24c8d60b62 (patch)
treed30f3d58b1044b5355d50c17a6a96c6a0b35703a /lib/Transforms
parentee2f195dd3e40f49698ca4dc2666ec09c770e80d (diff)
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Coroutines/CoroCleanup.cpp4
-rw-r--r--lib/Transforms/Coroutines/CoroEarly.cpp4
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp4
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp4
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp23
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp20
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp64
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp140
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp9
-rw-r--r--lib/Transforms/Scalar/GVN.cpp164
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp16
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp38
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp83
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp35
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp14
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp29
17 files changed, 309 insertions, 352 deletions
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 5cf2a8c25d837..359876627fce1 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -101,7 +101,9 @@ namespace {
struct CoroCleanup : FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CoroCleanup() : FunctionPass(ID) {}
+ CoroCleanup() : FunctionPass(ID) {
+ initializeCoroCleanupPass(*PassRegistry::getPassRegistry());
+ }
std::unique_ptr<Lowerer> L;
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index b529891861651..ba05896af150c 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -183,7 +183,9 @@ namespace {
struct CoroEarly : public FunctionPass {
static char ID; // Pass identification, replacement for typeid.
- CoroEarly() : FunctionPass(ID) {}
+ CoroEarly() : FunctionPass(ID) {
+ initializeCoroEarlyPass(*PassRegistry::getPassRegistry());
+ }
std::unique_ptr<Lowerer> L;
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index acb22449142b9..42fd6d7461459 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -258,7 +258,9 @@ static bool replaceDevirtTrigger(Function &F) {
namespace {
struct CoroElide : FunctionPass {
static char ID;
- CoroElide() : FunctionPass(ID) {}
+ CoroElide() : FunctionPass(ID) {
+ initializeCoroElidePass(*PassRegistry::getPassRegistry());
+ }
std::unique_ptr<Lowerer> L;
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index cd549e4be2826..613b4a7f03e98 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -681,7 +681,9 @@ namespace {
struct CoroSplit : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- CoroSplit() : CallGraphSCCPass(ID) {}
+ CoroSplit() : CallGraphSCCPass(ID) {
+ initializeCoroSplitPass(*PassRegistry::getPassRegistry());
+ }
bool Run = false;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 4c417f1c55ebe..bc0967448cddf 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// only split block when necessary:
PHINode *FirstPhi = getFirstPHI(PreReturn);
unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+ auto IsTrivialPhi = [](PHINode *PN) -> Value * {
+ Value *CommonValue = PN->getIncomingValue(0);
+ if (all_of(PN->incoming_values(),
+ [&](Value *V) { return V == CommonValue; }))
+ return CommonValue;
+ return nullptr;
+ };
+
if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
NewReturnBlock = NewReturnBlock->splitBasicBlock(
NewReturnBlock->getFirstNonPHI()->getIterator());
BasicBlock::iterator I = PreReturn->begin();
Instruction *Ins = &NewReturnBlock->front();
+ SmallVector<Instruction *, 4> DeadPhis;
while (I != PreReturn->end()) {
PHINode *OldPhi = dyn_cast<PHINode>(I);
if (!OldPhi)
@@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
OldPhi->removeIncomingValue(NewE);
}
+
+ // After incoming values splitting, the old phi may become trivial.
+ // Keeping the trivial phi can introduce definition inside the outline
+ // region which is live-out, causing necessary overhead (load, store
+ // arg passing etc).
+ if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+ OldPhi->replaceAllUsesWith(OldPhiVal);
+ DeadPhis.push_back(OldPhi);
+ }
+
++I;
}
+
+ for (auto *DP : DeadPhis)
+ DP->eraseFromParent();
+
for (auto E : OI->ReturnBlockPreds) {
BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 659cb9df00a2c..9dede4cedd1de 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -6,14 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This pass prepares a module containing type metadata for ThinLTO by splitting
-// it into regular and thin LTO parts if possible, and writing both parts to
-// a multi-module bitcode file. Modules that do not contain type metadata are
-// written unmodified as a single module.
-//
-//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -436,3 +430,15 @@ ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
raw_ostream *ThinLinkOS) {
return new WriteThinLTOBitcode(Str, ThinLinkOS);
}
+
+PreservedAnalyses
+llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ writeThinLTOBitcode(OS, ThinLinkOS,
+ [&FAM](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ },
+ M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92a38f26dde7a..b44499ec4be9f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3838,24 +3838,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
// checks on their arguments.
- SmallVector<unsigned, 4> Indices;
+ SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
for (Value *V : CS.args()) {
if (V->getType()->isPointerTy() &&
!CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
isKnownNonNullAt(V, CS.getInstruction(), &DT))
- Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
+ ArgNos.push_back(ArgNo);
ArgNo++;
}
assert(ArgNo == CS.arg_size() && "sanity check");
- if (!Indices.empty()) {
+ if (!ArgNos.empty()) {
AttributeList AS = CS.getAttributes();
LLVMContext &Ctx = CS.getInstruction()->getContext();
- AS = AS.addAttribute(Ctx, Indices,
- Attribute::get(Ctx, Attribute::NonNull));
+ AS = AS.addParamAttribute(Ctx, ArgNos,
+ Attribute::get(Ctx, Attribute::NonNull));
CS.setAttributes(AS);
Changed = true;
}
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 1e30dbf6b55a8..b2d95271479c3 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -182,6 +182,14 @@ static cl::opt<bool>
cl::desc("Use this option to turn on/off "
"memory intrinsic size profiling."));
+// Emit branch probability as optimization remarks.
+static cl::opt<bool>
+ EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
+ cl::desc("When this option is on, the annotated "
+ "branch probability will be emitted as "
+ " optimization remarks: -Rpass-analysis="
+ "pgo-instr-use"));
+
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
extern cl::opt<bool> PGOViewCounts;
@@ -192,6 +200,39 @@ extern cl::opt<std::string> ViewBlockFreqFuncName;
namespace {
+// Return a string describing the branch condition that can be
+// used in static branch probability heuristics:
+std::string getBranchCondString(Instruction *TI) {
+ BranchInst *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ return std::string();
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI)
+ return std::string();
+
+ std::string result;
+ raw_string_ostream OS(result);
+ OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
+ CI->getOperand(0)->getType()->print(OS, true);
+
+ Value *RHS = CI->getOperand(1);
+ ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ if (CV) {
+ if (CV->isZero())
+ OS << "_Zero";
+ else if (CV->isOne())
+ OS << "_One";
+ else if (CV->isAllOnesValue())
+ OS << "_MinusOne";
+ else
+ OS << "_Const";
+ }
+ OS.flush();
+ return result;
+}
+
/// The select instruction visitor plays three roles specified
/// by the mode. In \c VM_counting mode, it simply counts the number of
/// select instructions. In \c VM_instrument mode, it inserts code to count
@@ -1424,6 +1465,29 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
for (const auto &W : Weights) { dbgs() << W << " "; }
dbgs() << "\n";);
TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ if (EmitBranchProbability) {
+ std::string BrCondStr = getBranchCondString(TI);
+ if (BrCondStr.empty())
+ return;
+
+ unsigned WSum =
+ std::accumulate(Weights.begin(), Weights.end(), 0,
+ [](unsigned w1, unsigned w2) { return w1 + w2; });
+ uint64_t TotalCount =
+ std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0,
+ [](uint64_t c1, uint64_t c2) { return c1 + c2; });
+ BranchProbability BP(Weights[0], WSum);
+ std::string BranchProbStr;
+ raw_string_ostream OS(BranchProbStr);
+ OS << BP;
+ OS << " (total count : " << TotalCount << ")";
+ OS.flush();
+ Function *F = TI->getParent()->getParent();
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(),
+ Twine(BrCondStr) +
+ " is true with probability : " + Twine(BranchProbStr));
+ }
}
template <> struct GraphTraits<PGOUseFunc *> {
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 300085eccb0c2..325b64cd8b436 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -7,24 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Coverage instrumentation that works with AddressSanitizer
-// and potentially with other Sanitizers.
-//
-// We create a Guard variable with the same linkage
-// as the function and inject this code into the entry block (SCK_Function)
-// or all blocks (SCK_BB):
-// if (Guard < 0) {
-// __sanitizer_cov(&Guard);
-// }
-// The accesses to Guard are atomic. The rest of the logic is
-// in __sanitizer_cov (it's fine to call it more than once).
-//
-// With SCK_Edge we also split critical edges this effectively
-// instrumenting all edges.
-//
-// This coverage implementation provides very limited data:
-// it only tells if a given function (block) was ever executed. No counters.
-// But for many use cases this is what we need and the added slowdown small.
+// Coverage instrumentation done on LLVM IR level, works with Sanitizers.
//
//===----------------------------------------------------------------------===//
@@ -56,9 +39,6 @@ using namespace llvm;
#define DEBUG_TYPE "sancov"
-static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init";
-static const char *const SanCovName = "__sanitizer_cov";
-static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check";
static const char *const SanCovTracePCIndirName =
"__sanitizer_cov_trace_pc_indir";
static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
@@ -84,12 +64,6 @@ static cl::opt<int> ClCoverageLevel(
"3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
-static cl::opt<unsigned> ClCoverageBlockThreshold(
- "sanitizer-coverage-block-threshold",
- cl::desc("Use a callback with a guard check inside it if there are"
- " more than this number of blocks."),
- cl::Hidden, cl::init(0));
-
static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
cl::desc("Experimental pc tracing"),
cl::Hidden, cl::init(false));
@@ -151,6 +125,8 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.TraceGep |= ClGEPTracing;
Options.TracePC |= ClExperimentalTracePC;
Options.TracePCGuard |= ClTracePCGuard;
+ if (!Options.TracePCGuard && !Options.TracePC)
+ Options.TracePCGuard = true; // TracePCGuard is default.
Options.NoPrune |= !ClPruneBlocks;
return Options;
}
@@ -184,18 +160,10 @@ private:
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void CreateFunctionGuardArray(size_t NumGuards, Function &F);
- void SetNoSanitizeMetadata(Instruction *I);
- void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
- bool UseCalls);
- unsigned NumberOfInstrumentedBlocks() {
- return SanCovFunction->getNumUses() +
- SanCovWithCheckFunction->getNumUses();
- }
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx);
StringRef getSanCovTracePCGuardSection() const;
StringRef getSanCovTracePCGuardSectionStart() const;
StringRef getSanCovTracePCGuardSectionEnd() const;
- Function *SanCovFunction;
- Function *SanCovWithCheckFunction;
Function *SanCovTracePCIndir;
Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
@@ -209,7 +177,6 @@ private:
LLVMContext *C;
const DataLayout *DL;
- GlobalVariable *GuardArray;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
bool HasSancovGuardsSection;
@@ -230,16 +197,11 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
- Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
Int64Ty = IRB.getInt64Ty();
Int32Ty = IRB.getInt32Ty();
- SanCovFunction = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy));
- SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy));
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
SanCovTraceCmpFunction[0] =
@@ -278,41 +240,10 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
SanCovTracePCGuardName, VoidTy, Int32PtrTy));
- // At this point we create a dummy array of guards because we don't
- // know how many elements we will need.
- Type *Int32Ty = IRB.getInt32Ty();
-
- if (!Options.TracePCGuard)
- GuardArray =
- new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
- nullptr, "__sancov_gen_cov_tmp");
-
for (auto &F : M)
runOnFunction(F);
- auto N = NumberOfInstrumentedBlocks();
-
- GlobalVariable *RealGuardArray = nullptr;
- if (!Options.TracePCGuard) {
- // Now we know how many elements we need. Create an array of guards
- // with one extra element at the beginning for the size.
- Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
- RealGuardArray = new GlobalVariable(
- M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
-
- // Replace the dummy array with the real one.
- GuardArray->replaceAllUsesWith(
- IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
- GuardArray->eraseFromParent();
- }
-
// Create variable for module (compilation unit) name
- Constant *ModNameStrConst =
- ConstantDataArray::getString(M.getContext(), M.getName(), true);
- GlobalVariable *ModuleName = new GlobalVariable(
- M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage,
- ModNameStrConst, "__sancov_gen_modname");
if (Options.TracePCGuard) {
if (HasSancovGuardsSection) {
Function *CtorFunc;
@@ -339,18 +270,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
}
}
- } else if (!Options.TracePC) {
- Function *CtorFunc;
- std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, SanCovModuleInitName,
- {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
- {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
- ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy),
- IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
-
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
}
-
return true;
}
@@ -494,13 +414,12 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
return false;
case SanitizerCoverageOptions::SCK_Function:
CreateFunctionGuardArray(1, F);
- InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false);
+ InjectCoverageAtBlock(F, F.getEntryBlock(), 0);
return true;
default: {
- bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size();
CreateFunctionGuardArray(AllBlocks.size(), F);
for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
- InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls);
+ InjectCoverageAtBlock(F, *AllBlocks[i], i);
return true;
}
}
@@ -517,8 +436,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- if (!Options.TracePC && !Options.TracePCGuard)
- return;
+ assert(Options.TracePC || Options.TracePCGuard);
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
@@ -625,13 +543,8 @@ void SanitizerCoverageModule::InjectTraceForCmp(
}
}
-void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
- I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
-}
-
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
- size_t Idx, bool UseCalls) {
+ size_t Idx) {
BasicBlock::iterator IP = BB.getFirstInsertionPt();
bool IsEntryBB = &BB == &F.getEntryBlock();
DebugLoc EntryLoc;
@@ -651,47 +564,14 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
if (Options.TracePC) {
IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- } else if (Options.TracePCGuard) {
+ } else {
+ assert(Options.TracePCGuard);
auto GuardPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
ConstantInt::get(IntptrTy, Idx * 4)),
Int32PtrTy);
- if (!UseCalls) {
- auto GuardLoad = IRB.CreateLoad(GuardPtr);
- GuardLoad->setAtomic(AtomicOrdering::Monotonic);
- GuardLoad->setAlignment(8);
- SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan.
- auto Cmp = IRB.CreateICmpNE(
- GuardLoad, Constant::getNullValue(GuardLoad->getType()));
- auto Ins = SplitBlockAndInsertIfThen(
- Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
- IRB.SetInsertPoint(Ins);
- IRB.SetCurrentDebugLocation(EntryLoc);
- }
IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- } else {
- Value *GuardP = IRB.CreateAdd(
- IRB.CreatePointerCast(GuardArray, IntptrTy),
- ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
- GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
- if (UseCalls) {
- IRB.CreateCall(SanCovWithCheckFunction, GuardP);
- } else {
- LoadInst *Load = IRB.CreateLoad(GuardP);
- Load->setAtomic(AtomicOrdering::Monotonic);
- Load->setAlignment(4);
- SetNoSanitizeMetadata(Load);
- Value *Cmp =
- IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
- Instruction *Ins = SplitBlockAndInsertIfThen(
- Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
- IRB.SetInsertPoint(Ins);
- IRB.SetCurrentDebugLocation(EntryLoc);
- // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
- IRB.CreateCall(SanCovFunction, GuardP);
- IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- }
}
}
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ee493a8ec7e18..7b625b9b136ec 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -305,7 +305,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
/// Infer nonnull attributes for the arguments at the specified callsite.
static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
- SmallVector<unsigned, 4> Indices;
+ SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
for (Value *V : CS.args()) {
@@ -318,18 +318,19 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
CS.getInstruction()) == LazyValueInfo::False)
- Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
+ ArgNos.push_back(ArgNo);
ArgNo++;
}
assert(ArgNo == CS.arg_size() && "sanity check");
- if (Indices.empty())
+ if (ArgNos.empty())
return false;
AttributeList AS = CS.getAttributes();
LLVMContext &Ctx = CS.getInstruction()->getContext();
- AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+ AS = AS.addParamAttribute(Ctx, ArgNos,
+ Attribute::get(Ctx, Attribute::NonNull));
CS.setAttributes(AS);
return true;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0d6e0538261d0..0490d93f64553 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
- bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
+ Expression(uint32_t o = ~2U) : opcode(o) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
- e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
- e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
- e.commutative = true;
return e;
}
@@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
- if (PHINode *PN = dyn_cast<PHINode>(V))
- NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t e = assignExpNewValueNum(exp).first;
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- auto ValNum = assignExpNewValueNum(exp);
- if (ValNum.second) {
- valueNumbering[C] = ValNum.first;
- return ValNum.first;
+ uint32_t &e = expressionNumbering[exp];
+ if (!e) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
}
if (!MD) {
- uint32_t e = assignExpNewValueNum(exp).first;
+ e = nextValueNumber++;
valueNumbering[C] = e;
return e;
}
@@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
- case Instruction::PHI:
- valueNumbering[V] = nextValueNumber;
- NumberingPhi[nextValueNumber] = cast<PHINode>(V);
- return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t e = assignExpNewValueNum(exp).first;
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVN::ValueTable::lookup(Value *V) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- if (Verify) {
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
- }
- return (VI != valueNumbering.end()) ? VI->second : 0;
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
}
/// Returns the value number of the given comparison,
@@ -559,29 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- return assignExpNewValueNum(exp).first;
+ uint32_t& e = expressionNumbering[exp];
+ if (!e) e = nextValueNumber++;
+ return e;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
- NumberingPhi.clear();
- PhiTranslateTable.clear();
- BlockRPONumber.clear();
nextValueNumber = 1;
- Expressions.clear();
- ExprIdx.clear();
- nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
- uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
- // If V is PHINode, V <--> value number is an one-to-one mapping.
- if (isa<PHINode>(V))
- NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -1469,104 +1451,6 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
-/// Return a pair the first field showing the value number of \p Exp and the
-/// second field showing whether it is a value number newly created.
-std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
- uint32_t &e = expressionNumbering[Exp];
- bool CreateNewValNum = !e;
- if (CreateNewValNum) {
- Expressions.push_back(Exp);
- if (ExprIdx.size() < nextValueNumber + 1)
- ExprIdx.resize(nextValueNumber * 2);
- e = nextValueNumber;
- ExprIdx[nextValueNumber++] = nextExprNumber++;
- }
- return {e, CreateNewValNum};
-}
-
-void GVN::ValueTable::assignBlockRPONumber(Function &F) {
- uint32_t NextBlockNumber = 1;
- ReversePostOrderTraversal<Function *> RPOT(&F);
- for (BasicBlock *BB : RPOT)
- BlockRPONumber[BB] = NextBlockNumber++;
-}
-
-/// Return whether all the values related with the same \p num are
-/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
- GVN &Gvn) {
- LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
- while (Vals && Vals->BB == BB)
- Vals = Vals->Next;
- return !Vals;
-}
-
-/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
- const BasicBlock *PhiBlock, uint32_t Num,
- GVN &Gvn) {
- auto FindRes = PhiTranslateTable.find({Num, Pred});
- if (FindRes != PhiTranslateTable.end())
- return FindRes->second;
- uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
- PhiTranslateTable.insert({{Num, Pred}, NewNum});
- return NewNum;
-}
-
-/// Translate value number \p Num using phis, so that it has the values of
-/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
- const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn) {
- if (PHINode *PN = NumberingPhi[Num]) {
- if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
- return Num;
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
- if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
- return TransVal;
- }
- return Num;
- }
-
- // If there is any value related with Num is defined in a BB other than
- // PhiBlock, it cannot depend on a phi in PhiBlock without going through
- // a backedge. We can do an early exit in that case to save compile time.
- if (!areAllValsInBB(Num, PhiBlock, Gvn))
- return Num;
-
- if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
- return Num;
- Expression Exp = Expressions[ExprIdx[Num]];
-
- for (unsigned i = 0; i < Exp.varargs.size(); i++) {
- // For InsertValue and ExtractValue, some varargs are index numbers
- // instead of value numbers. Those index numbers should not be
- // translated.
- if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
- (i > 0 && Exp.opcode == Instruction::ExtractValue))
- continue;
- Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
- }
-
- if (Exp.commutative) {
- assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
- if (Exp.varargs[0] > Exp.varargs[1]) {
- std::swap(Exp.varargs[0], Exp.varargs[1]);
- uint32_t Opcode = Exp.opcode >> 8;
- if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
- Exp.opcode = (Opcode << 8) |
- CmpInst::getSwappedPredicate(
- static_cast<CmpInst::Predicate>(Exp.opcode & 255));
- }
- }
-
- if (uint32_t NewNum = expressionNumbering[Exp])
- return NewNum;
- return Num;
-}
-
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1972,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
- VN.assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -2062,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- uint32_t TValNo =
- VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
- if (Value *V = findLeader(Pred, TValNo)) {
+ if (Value *V = findLeader(Pred, VN.lookup(Op))) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -2081,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
-
- unsigned Num = VN.lookupOrAdd(Instr);
- VN.add(Instr, Num);
+ VN.add(Instr, ValNo);
// Update the availability map to include the new instruction.
- addToLeaderTable(Num, Instr, Pred);
+ addToLeaderTable(ValNo, Instr, Pred);
return true;
}
@@ -2135,8 +2014,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
break;
}
- uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
- Value *predV = findLeader(P, TValNo);
+ Value *predV = findLeader(P, ValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index a143b9a3c645f..930696b036c00 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -98,11 +98,20 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
CallInst *CI;
ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
+ CmpInst::Predicate Predicate;
+ uint64_t ValueComparedTo = 0;
if (!CmpI) {
CI = dyn_cast<CallInst>(BSI.getCondition());
+ Predicate = CmpInst::ICMP_NE;
+ ValueComparedTo = 0;
} else {
- if (CmpI->getPredicate() != CmpInst::ICMP_NE)
+ Predicate = CmpI->getPredicate();
+ if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ)
return false;
+ ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
+ if (!CmpConstOperand)
+ return false;
+ ValueComparedTo = CmpConstOperand->getZExtValue();
CI = dyn_cast<CallInst>(CmpI->getOperand(0));
}
@@ -121,9 +130,8 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
MDBuilder MDB(CI->getContext());
MDNode *Node;
- // If expect value is equal to 1 it means that we are more likely to take
- // branch 0, in other case more likely is branch 1.
- if (ExpectedValue->isOne())
+ if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
+ (Predicate == CmpInst::ICMP_EQ))
Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
else
Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5e9f40019ce8c..27809f5b6f661 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -613,7 +613,7 @@ private:
return CClass;
}
void initializeCongruenceClasses(Function &F);
- const Expression *makePossiblePhiOfOps(Instruction *, bool,
+ const Expression *makePossiblePhiOfOps(Instruction *,
SmallPtrSetImpl<Value *> &);
void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
@@ -1937,7 +1937,8 @@ void NewGVN::touchAndErase(Map &M, const KeyType &Key) {
}
void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
- AdditionalUsers[To].insert(User);
+ if (isa<Instruction>(To))
+ AdditionalUsers[To].insert(User);
}
void NewGVN::markUsersTouched(Value *V) {
@@ -2423,7 +2424,7 @@ static bool okayForPHIOfOps(const Instruction *I) {
// When we see an instruction that is an op of phis, generate the equivalent phi
// of ops form.
const Expression *
-NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+NewGVN::makePossiblePhiOfOps(Instruction *I,
SmallPtrSetImpl<Value *> &Visited) {
if (!okayForPHIOfOps(I))
return nullptr;
@@ -2438,24 +2439,6 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
return nullptr;
unsigned IDFSNum = InstrToDFSNum(I);
- // Pretty much all of the instructions we can convert to phi of ops over a
- // backedge that are adds, are really induction variables, and those are
- // pretty much pointless to convert. This is very coarse-grained for a
- // test, so if we do find some value, we can change it later.
- // But otherwise, what can happen is we convert the induction variable from
- //
- // i = phi (0, tmp)
- // tmp = i + 1
- //
- // to
- // i = phi (0, tmpphi)
- // tmpphi = phi(1, tmpphi+1)
- //
- // Which we don't want to happen. We could just avoid this for all non-cycle
- // free phis, and we made go that route.
- if (HasBackedge && I->getOpcode() == Instruction::Add)
- return nullptr;
-
SmallPtrSet<const Value *, 8> ProcessedPHIs;
// TODO: We don't do phi translation on memory accesses because it's
// complicated. For a load, we'd need to be able to simulate a new memoryuse,
@@ -2470,6 +2453,16 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
// Convert op of phis to phi of ops
for (auto &Op : I->operands()) {
+ // TODO: We can't handle expressions that must be recursively translated
+ // IE
+ // a = phi (b, c)
+ // f = use a
+ // g = f + phi of something
+ // To properly make a phi of ops for g, we'd have to properly translate and
+ // use the instruction for f. We should add this by splitting out the
+ // instruction creation we do below.
+ if (isa<Instruction>(Op) && PHINodeUses.count(cast<Instruction>(Op)))
+ return nullptr;
if (!isa<PHINode>(Op))
continue;
auto *OpPHI = cast<PHINode>(Op);
@@ -2782,8 +2775,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
// Make a phi of ops if necessary
if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
!isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
- // FIXME: Backedge argument
- auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+ auto *PHIE = makePossiblePhiOfOps(I, Visited);
if (PHIE)
Symbolized = PHIE;
}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index ed72099ec3ed6..24d28a6c28311 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
@@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
return false;
}
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
- ValueSet &Outputs) const {
+void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+ Function *Func = (*Blocks.begin())->getParent();
+ for (BasicBlock &BB : *Func) {
+ if (Blocks.count(&BB))
+ continue;
+ for (Instruction &II : BB) {
+ auto *AI = dyn_cast<AllocaInst>(&II);
+ if (!AI)
+ continue;
+
+ // Returns true if matching life time markers are found within
+ // the outlined region.
+ auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+ Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+ for (User *U : Addr->users()) {
+ if (!definedInRegion(Blocks, U))
+ return false;
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifeStart = IntrInst;
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ LifeEnd = IntrInst;
+ }
+ }
+ return LifeStart && LifeEnd;
+ };
+
+ if (GetLifeTimeMarkers(AI)) {
+ SinkCands.insert(AI);
+ continue;
+ }
+
+ // Follow the bitcast:
+ Instruction *MarkerAddr = nullptr;
+ for (User *U : AI->users()) {
+ if (U->stripPointerCasts() == AI) {
+ Instruction *Bitcast = cast<Instruction>(U);
+ if (GetLifeTimeMarkers(Bitcast)) {
+ MarkerAddr = Bitcast;
+ continue;
+ }
+ }
+ if (!definedInRegion(Blocks, U)) {
+ MarkerAddr = nullptr;
+ break;
+ }
+ }
+ if (MarkerAddr) {
+ if (!definedInRegion(Blocks, MarkerAddr))
+ SinkCands.insert(MarkerAddr);
+ SinkCands.insert(AI);
+ }
+ }
+ }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+ const ValueSet &SinkCands) const {
+
for (BasicBlock *BB : Blocks) {
// If a used value is defined outside the region, it's an input. If an
// instruction is used outside the region, it's an output.
for (Instruction &II : *BB) {
for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
- ++OI)
- if (definedInCaller(Blocks, *OI))
- Inputs.insert(*OI);
+ ++OI) {
+ Value *V = *OI;
+ if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+ Inputs.insert(V);
+ }
for (User *U : II.users())
if (!definedInRegion(Blocks, U)) {
@@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
- ValueSet inputs, outputs;
+ ValueSet inputs, outputs, SinkingCands;
// Assumption: this is a single-entry code region, and the header is the first
// block in the region.
@@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() {
"newFuncRoot");
newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+ findAllocas(SinkingCands);
+
// Find inputs to, outputs from the code region.
- findInputsOutputs(inputs, outputs);
+ findInputsOutputs(inputs, outputs, SinkingCands);
+
+ // Now sink all instructions which only have non-phi uses inside the region
+ for (auto *II : SinkingCands)
+ cast<Instruction>(II)->moveBefore(*newFuncRoot,
+ newFuncRoot->getFirstInsertionPt());
// Calculate the exit blocks for the extracted region and the total exit
// weights for each of those blocks.
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 8877aeafecdec..9e71cba4f1b7a 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -541,7 +541,40 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
//
// TODO: Use this algorithm to perform fast single-variable renaming in
// promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpsToRename) {
+void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
+ // Sort OpsToRename since we are going to iterate it.
+ SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
+ std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A,
+ const Value *B) {
+ auto *ArgA = dyn_cast_or_null<Argument>(A);
+ auto *ArgB = dyn_cast_or_null<Argument>(B);
+
+ // If A and B are args, order them based on their arg no.
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+
+ // Else, A are B are instructions.
+ // If they belong to different BBs, order them by the dominance of BBs.
+ auto *AInst = cast<Instruction>(A);
+ auto *BInst = cast<Instruction>(B);
+ if (AInst->getParent() != BInst->getParent())
+ return DT.dominates(AInst->getParent(), BInst->getParent());
+
+ // Else, A and B belong to the same BB.
+ // Order A and B by their dominance.
+ auto *BB = AInst->getParent();
+ auto LookupResult = OBBMap.find(BB);
+ if (LookupResult != OBBMap.end())
+ return LookupResult->second->dominates(AInst, BInst);
+
+ auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
+ return Result.first->second->dominates(AInst, BInst);
+ });
+
ValueDFS_Compare Compare(OBBMap);
// Compute liveness, and rename in O(uses) per Op.
for (auto *Op : OpsToRename) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 49effda5d833c..cc6c47e8f978d 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -85,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) {
return false;
}
-/// Return true if it only matters that the value is equal or not-equal to zero.
-static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
- for (User *U : V->users()) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (IC->isEquality())
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
/// Return true if it is only used in equality comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
for (User *U : V->users()) {
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2b83b8426d147..8b9a64c220ccd 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7170,10 +7170,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Type *VectorTy;
unsigned C = getInstructionCost(I, VF, VectorTy);
- // Note: Even if all instructions are scalarized, return true if any memory
- // accesses appear in the loop to get benefits from address folding etc.
bool TypeNotScalarized =
- VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
+ VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
return VectorizationCostTy(C, TypeNotScalarized);
}
@@ -7312,7 +7310,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
- VectorTy = ToVectorTy(RetTy, VF);
+ VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
@@ -7445,9 +7443,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
} else if (Legal->isUniform(Op2)) {
Op2VK = TargetTransformInfo::OK_UniformValue;
}
- SmallVector<const Value *, 4> Operands(I->operand_values());
- return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
- Op2VK, Op1VP, Op2VP, Operands);
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+ Op2VK, Op1VP, Op2VP, Operands);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -7470,7 +7469,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
case Instruction::Store:
case Instruction::Load: {
- VectorTy = ToVectorTy(getMemInstValueType(I), VF);
+ unsigned Width = VF;
+ if (Width > 1) {
+ InstWidening Decision = getWideningDecision(I, Width);
+ assert(Decision != CM_Unknown &&
+ "CM decision should be taken at this point");
+ if (Decision == CM_Scalarize)
+ Width = 1;
+ }
+ VectorTy = ToVectorTy(getMemInstValueType(I), Width);
return getMemoryInstructionCost(I, VF);
}
case Instruction::ZExt:
@@ -7495,7 +7502,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
Type *SrcScalarTy = I->getOperand(0)->getType();
- Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
+ Type *SrcVecTy =
+ VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy;
if (canTruncateToMinimalBitwidth(I, VF)) {
// This cast is going to be shrunk. This may remove the cast or it might
// turn it into slightly different cast. For example, if MinBW == 16,
@@ -7515,7 +7523,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
}
- return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
}
case Instruction::Call: {
bool NeedToScalarize;