summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp2
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp12
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp107
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp3
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp66
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp11
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp1
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp181
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp1
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp162
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp10
-rw-r--r--lib/Transforms/Scalar/SROA.cpp7
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp71
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp151
17 files changed, 521 insertions, 272 deletions
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 613b4a7f03e98..626a891f65c6a 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -228,7 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
SmallVector<ReturnInst *, 4> Returns;
- CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns);
+ CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
// Remove old returns.
for (ReturnInst *Return : Returns)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index ea48043f9381f..44e1f9b404eda 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) {
size_t FinalSuspendIndex = 0;
clear(*this);
SmallVector<CoroFrameInst *, 8> CoroFrames;
+ SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
+
for (Instruction &I : instructions(F)) {
if (auto II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
@@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) {
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;
+ case Intrinsic::coro_save:
+ // After optimizations, coro_suspends using this coro_save might have
+ // been removed, remember orphaned coro_saves to remove them later.
+ if (II->use_empty())
+ UnusedCoroSaves.push_back(cast<CoroSaveInst>(II));
+ break;
case Intrinsic::coro_suspend:
CoroSuspends.push_back(cast<CoroSuspendInst>(II));
if (CoroSuspends.back()->isFinal()) {
@@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) {
if (HasFinalSuspend &&
FinalSuspendIndex != CoroSuspends.size() - 1)
std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back());
+
+ // Remove orphaned coro.saves.
+ for (CoroSaveInst *CoroSave : UnusedCoroSaves)
+ CoroSave->eraseFromParent();
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 28cc81c76d4fb..5cc29a4937986 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1188,6 +1188,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SCCNodes.insert(F);
}
+ // Skip it if the SCC only contains optnone functions.
+ if (SCCNodes.empty())
+ return Changed;
+
Changed |= addArgumentReturnedAttrs(SCCNodes);
Changed |= addReadAttrs(SCCNodes, AARGetter);
Changed |= addArgumentAttrs(SCCNodes);
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 231487923fada..6d34ab8b0d960 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -292,8 +292,7 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
- const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -301,7 +300,7 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
- if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ if (!Index.isGlobalValueLive(GVSummary.second)) {
DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
continue;
}
@@ -344,15 +343,14 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> *DeadSymbols) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists, DeadSymbols);
+ &ExportLists);
}
// When computing imports we added all GUIDs referenced by anything
@@ -414,82 +412,71 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
-DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
- const ModuleSummaryIndex &Index,
+void llvm::computeDeadSymbols(
+ ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ assert(!Index.withGlobalValueDeadStripping());
if (!ComputeDead)
- return DenseSet<GlobalValue::GUID>();
+ return;
if (GUIDPreservedSymbols.empty())
// Don't do anything when nothing is live, this is friendly with tests.
- return DenseSet<GlobalValue::GUID>();
- DenseSet<ValueInfo> LiveSymbols;
+ return;
+ unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
for (auto GUID : GUIDPreservedSymbols) {
ValueInfo VI = Index.getValueInfo(GUID);
if (!VI)
continue;
- DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n");
- LiveSymbols.insert(VI);
- Worklist.push_back(VI);
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
}
+
// Add values flagged in the index as live roots to the worklist.
- for (const auto &Entry : Index) {
- bool IsLiveRoot = llvm::any_of(
- Entry.second.SummaryList,
- [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
- return Summary->liveRoot();
- });
- if (!IsLiveRoot)
- continue;
- DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
- Worklist.push_back(ValueInfo(&Entry));
- }
+ for (const auto &Entry : Index)
+ for (auto &S : Entry.second.SummaryList)
+ if (S->isLive()) {
+ DEBUG(dbgs() << "Live root: " << Entry.first << "\n");
+ Worklist.push_back(ValueInfo(&Entry));
+ ++LiveSymbols;
+ break;
+ }
+
+ // Make value live and add it to the worklist if it was not live before.
+ // FIXME: we should only make the prevailing copy live here
+ auto visit = [&](ValueInfo VI) {
+ for (auto &S : VI.getSummaryList())
+ if (S->isLive())
+ return;
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
+ ++LiveSymbols;
+ Worklist.push_back(VI);
+ };
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
-
- // FIXME: we should only make the prevailing copy live here
for (auto &Summary : VI.getSummaryList()) {
- for (auto Ref : Summary->refs()) {
- if (LiveSymbols.insert(Ref).second) {
- DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n");
- Worklist.push_back(Ref);
- }
- }
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
- for (auto Call : FS->calls()) {
- if (LiveSymbols.insert(Call.first).second) {
- DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID()
- << "\n");
- Worklist.push_back(Call.first);
- }
- }
- }
+ for (auto Ref : Summary->refs())
+ visit(Ref);
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ for (auto Call : FS->calls())
+ visit(Call.first);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
auto AliaseeGUID = AS->getAliasee().getOriginalName();
ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID);
- if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) {
- DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
- Worklist.push_back(AliaseeVI);
- }
+ if (AliaseeVI)
+ visit(AliaseeVI);
}
}
}
- DenseSet<GlobalValue::GUID> DeadSymbols;
- DeadSymbols.reserve(
- std::min(Index.size(), Index.size() - LiveSymbols.size()));
- for (auto &Entry : Index) {
- if (!LiveSymbols.count(ValueInfo(&Entry))) {
- DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n");
- DeadSymbols.insert(Entry.first);
- }
- }
- DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
- << DeadSymbols.size() << " symbols Dead \n");
- NumDeadSymbols += DeadSymbols.size();
- NumLiveSymbols += LiveSymbols.size();
- return DeadSymbols;
+ Index.setWithGlobalValueDeadStripping();
+
+ unsigned DeadSymbols = Index.size() - LiveSymbols;
+ DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
+ << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols;
+ NumLiveSymbols += LiveSymbols;
}
/// Compute the set of summaries needed for a ThinLTO backend compilation of
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index ca4ee92f971a1..7bec50d9d25f8 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1442,9 +1442,8 @@ bool LowerTypeTestsModule::lower() {
for (auto &P : *ExportSummary) {
for (auto &S : P.second.SummaryList) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
- if (!FS)
+ if (!FS || !ExportSummary->isGlobalValueLive(FS))
continue;
- // FIXME: Only add live functions.
for (GlobalValue::GUID G : FS->type_tests())
for (Metadata *MD : MetadataByGUID[G])
AddTypeIdUse(MD).IsExported = true;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index bc0967448cddf..ea805efc66b79 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -68,6 +68,10 @@ static cl::opt<int>
cl::desc("Relative frequency of outline region to "
"the entry block"));
+static cl::opt<unsigned> ExtraOutliningPenalty(
+ "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
+ cl::desc("A debug option to add additional penalty to the computed one."));
+
namespace {
struct FunctionOutliningInfo {
@@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
SmallVector<BasicBlock *, 4> Entries;
// The return block that is not included in the outlined region.
BasicBlock *ReturnBlock;
- // The dominating block of the region ot be outlined.
+ // The dominating block of the region to be outlined.
BasicBlock *NonReturnBlock;
// The set of blocks in Entries that that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
@@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
if (hasProfileData(F, OI))
return OutlineRegionRelFreq;
- // When profile data is not available, we need to be very
- // conservative in estimating the overall savings. We need to make sure
- // the outline region relative frequency is not below the threshold
- // specified by the option.
- OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
+ // When profile data is not available, we need to be conservative in
+ // estimating the overall savings. Static branch prediction can usually
+ // guess the branch direction right (taken/non-taken), but the guessed
+ // branch probability is usually not biased enough. In case when the
+ // outlined region is predicted to be likely, its probability needs
+ // to be made higher (more biased) to not under-estimate the cost of
+ // function outlining. On the other hand, if the outlined region
+ // is predicted to be less likely, the predicted probablity is usually
+ // higher than the actual. For instance, the actual probability of the
+ // less likely target is only 5%, but the guessed probablity can be
+ // 40%. In the latter case, there is no need for further adjustement.
+ // FIXME: add an option for this.
+ if (OutlineRegionRelFreq < BranchProbability(45, 100))
+ return OutlineRegionRelFreq;
+
+ OutlineRegionRelFreq = std::max(
+ OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
return OutlineRegionRelFreq;
}
@@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::Alloca:
+ continue;
+ case Instruction::GetElementPtr:
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ continue;
+ default:
+ break;
+ }
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(I)) {
InlineCost += getCallsiteCost(CallSite(CI), DL);
continue;
@@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
BasicBlock *OutliningCallBB) {
// First compute the cost of the outlined region 'OI' in the original
- // function 'F':
+ // function 'F'.
+ // FIXME: The code extractor (outliner) can now do code sinking/hoisting
+ // to reduce outlining cost. The hoisted/sunk code currently do not
+ // incur any runtime cost so it is still OK to compare the outlined
+ // function cost with the outlined region in the original function.
+ // If this ever changes, we will need to introduce new extractor api
+ // to pass the information.
int OutlinedRegionCost = 0;
for (BasicBlock &BB : *F) {
if (&BB != OI->ReturnBlock &&
@@ -542,8 +584,14 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
- int OutliningRuntimeOverhead =
- OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
+ // The code extractor introduces a new root and exit stub blocks with
+ // additional unconditional branches. Those branches will be eliminated
+ // later with bb layout. The cost should be adjusted accordingly:
+ OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
+
+ int OutliningRuntimeOverhead = OutliningFuncCallCost +
+ (OutlinedFunctionCost - OutlinedRegionCost) +
+ ExtraOutliningPenalty;
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
OutlinedRegionCost);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9fd3a9021a270..16fba32e98056 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -157,7 +157,7 @@ static cl::opt<bool>
static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN sinking pass (default = on)"));
+ cl::desc("Enable the GVN sinking pass (default = off)"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2c2b7317a1c09..c0798e164c39f 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4508,13 +4508,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateAnd(A, B),
Op1);
- // ~x < ~y --> y < x
- // ~x < cst --> ~cst < x
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
if (match(Op0, m_Not(m_Value(A)))) {
if (match(Op1, m_Not(m_Value(B))))
return new ICmpInst(I.getPredicate(), B, A);
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
- return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+
+ const APInt *C;
+ if (match(Op1, m_APInt(C)))
+ return new ICmpInst(I.getSwappedPredicate(), A,
+ ConstantInt::get(Op1->getType(), ~(*C)));
}
Instruction *AddI = nullptr;
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ff753c20a94af..df4ee9969c02f 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
switch (I.getNumArgOperands()) {
case 3:
assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
+ LLVM_FALLTHROUGH;
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 325b64cd8b436..8aa40d1759de6 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -57,6 +57,11 @@ static const char *const SanCovTracePCGuardName =
"__sanitizer_cov_trace_pc_guard";
static const char *const SanCovTracePCGuardInitName =
"__sanitizer_cov_trace_pc_guard_init";
+static const char *const SanCov8bitCountersInitName =
+ "__sanitizer_cov_8bit_counters_init";
+
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_counters";
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
@@ -64,14 +69,18 @@ static cl::opt<int> ClCoverageLevel(
"3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
-static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
- cl::desc("Experimental pc tracing"),
- cl::Hidden, cl::init(false));
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
cl::desc("pc tracing with a guard"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool>
ClCMPTracing("sanitizer-coverage-trace-compares",
cl::desc("Tracing of CMP and similar instructions"),
@@ -123,9 +132,10 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.TraceCmp |= ClCMPTracing;
Options.TraceDiv |= ClDIVTracing;
Options.TraceGep |= ClGEPTracing;
- Options.TracePC |= ClExperimentalTracePC;
+ Options.TracePC |= ClTracePC;
Options.TracePCGuard |= ClTracePCGuard;
- if (!Options.TracePCGuard && !Options.TracePC)
+ Options.Inline8bitCounters |= ClInline8bitCounters;
+ if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters)
Options.TracePCGuard = true; // TracePCGuard is default.
Options.NoPrune |= !ClPruneBlocks;
return Options;
@@ -159,11 +169,22 @@ private:
void InjectTraceForSwitch(Function &F,
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
- void CreateFunctionGuardArray(size_t NumGuards, Function &F);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ void CreateFunctionLocalArrays(size_t NumGuards, Function &F);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx);
- StringRef getSanCovTracePCGuardSection() const;
- StringRef getSanCovTracePCGuardSectionStart() const;
- StringRef getSanCovTracePCGuardSectionEnd() const;
+ void CreateInitCallForSection(Module &M, const char *InitFunctionName,
+ Type *Ty, const std::string &Section);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ std::string getSectionStart(const std::string &Section) const;
+ std::string getSectionEnd(const std::string &Section) const;
Function *SanCovTracePCIndir;
Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
@@ -171,20 +192,48 @@ private:
Function *SanCovTraceGepFunction;
Function *SanCovTraceSwitchFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int8Ty, *Int8PtrTy;
Module *CurModule;
Triple TargetTriple;
LLVMContext *C;
const DataLayout *DL;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
- bool HasSancovGuardsSection;
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
SanitizerCoverageOptions Options;
};
} // namespace
+void SanitizerCoverageModule::CreateInitCallForSection(
+ Module &M, const char *InitFunctionName, Type *Ty,
+ const std::string &Section) {
+ IRBuilder<> IRB(M.getContext());
+ Function *CtorFunc;
+ GlobalVariable *SecStart =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},
+ {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)});
+
+ if (TargetTriple.supportsCOMDAT()) {
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+ } else {
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ }
+}
+
bool SanitizerCoverageModule::runOnModule(Module &M) {
if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
return false;
@@ -192,15 +241,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
DL = &M.getDataLayout();
CurModule = &M;
TargetTriple = Triple(M.getTargetTriple());
- HasSancovGuardsSection = false;
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int64Ty = IRB.getInt64Ty();
Int32Ty = IRB.getInt32Ty();
+ Int8Ty = IRB.getInt8Ty();
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
@@ -243,34 +295,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
for (auto &F : M)
runOnFunction(F);
- // Create variable for module (compilation unit) name
- if (Options.TracePCGuard) {
- if (HasSancovGuardsSection) {
- Function *CtorFunc;
- GlobalVariable *SecStart = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionStart());
- SecStart->setVisibility(GlobalValue::HiddenVisibility);
- GlobalVariable *SecEnd = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionEnd());
- SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
- std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, SanCovTracePCGuardInitName,
- {Int32PtrTy, Int32PtrTy},
- {IRB.CreatePointerCast(SecStart, Int32PtrTy),
- IRB.CreatePointerCast(SecEnd, Int32PtrTy)});
-
- if (TargetTriple.supportsCOMDAT()) {
- // Use comdat to dedup CtorFunc.
- CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
- } else {
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
- }
- }
- }
+ if (FunctionGuardArray)
+ CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy,
+ SanCovGuardsSectionName);
+ if (Function8bitCounterArray)
+ CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy,
+ SanCovCountersSectionName);
+
return true;
}
@@ -393,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
InjectTraceForGep(F, GepTraceTargets);
return true;
}
-void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards,
- Function &F) {
- if (!Options.TracePCGuard) return;
- HasSancovGuardsSection = true;
- ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards);
- FunctionGuardArray = new GlobalVariable(
- *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage,
- Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_");
+
+GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
if (auto Comdat = F.getComdat())
- FunctionGuardArray->setComdat(Comdat);
- FunctionGuardArray->setSection(getSanCovTracePCGuardSection());
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+ return Array;
+}
+void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards,
+ Function &F) {
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int32Ty, SanCovGuardsSectionName);
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int8Ty, SanCovCountersSectionName);
}
bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -413,11 +453,11 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
case SanitizerCoverageOptions::SCK_None:
return false;
case SanitizerCoverageOptions::SCK_Function:
- CreateFunctionGuardArray(1, F);
+ CreateFunctionLocalArrays(1, F);
InjectCoverageAtBlock(F, F.getEntryBlock(), 0);
return true;
default: {
- CreateFunctionGuardArray(AllBlocks.size(), F);
+ CreateFunctionLocalArrays(AllBlocks.size(), F);
for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
InjectCoverageAtBlock(F, *AllBlocks[i], i);
return true;
@@ -436,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- assert(Options.TracePC || Options.TracePCGuard);
+ assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters);
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
@@ -564,8 +604,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
if (Options.TracePC) {
IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- } else {
- assert(Options.TracePCGuard);
+ }
+ if (Options.TracePCGuard) {
auto GuardPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
ConstantInt::get(IntptrTy, Idx * 4)),
@@ -573,26 +613,39 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
}
+ if (Options.Inline8bitCounters) {
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+ }
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
+std::string
+SanitizerCoverageModule::getSectionName(const std::string &Section) const {
if (TargetTriple.getObjectFormat() == Triple::COFF)
return ".SCOV$M";
if (TargetTriple.isOSBinFormatMachO())
- return "__DATA,__sancov_guards";
- return "__sancov_guards";
+ return "__DATA,__" + Section;
+ return "__" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const {
+std::string
+SanitizerCoverageModule::getSectionStart(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$start$__DATA$__sancov_guards";
- return "__start___sancov_guards";
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const {
+std::string
+SanitizerCoverageModule::getSectionEnd(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$end$__DATA$__sancov_guards";
- return "__stop___sancov_guards";
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3953198fe6052..9a7882211bac6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1823,6 +1823,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
// An IV counter must preserve its type.
if (IncI->getNumOperands() == 2)
break;
+ LLVM_FALLTHROUGH;
default:
return nullptr;
}
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 930696b036c00..7d8da9b453f9f 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -83,6 +84,149 @@ static bool handleSwitchExpect(SwitchInst &SI) {
return true;
}
+/// Handler for PHINodes that define the value argument to an
+/// @llvm.expect call.
+///
+/// If the operand of the phi has a constant value and it 'contradicts'
+/// with the expected value of phi def, then the corresponding incoming
+/// edge of the phi is unlikely to be taken. Using that information,
+/// the branch probability info for the originating branch can be inferred.
+static void handlePhiDef(CallInst *Expect) {
+ Value &Arg = *Expect->getArgOperand(0);
+ ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1));
+ const APInt &ExpectedPhiValue = ExpectedValue->getValue();
+
+ // Walk up in backward a list of instructions that
+ // have 'copy' semantics by 'stripping' the copies
+ // until a PHI node or an instruction of unknown kind
+ // is reached. Negation via xor is also handled.
+ //
+ // C = PHI(...);
+ // B = C;
+ // A = B;
+ // D = __builtin_expect(A, 0);
+ //
+ Value *V = &Arg;
+ SmallVector<Instruction *, 4> Operations;
+ while (!isa<PHINode>(V)) {
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(V)) {
+ V = ZExt->getOperand(0);
+ Operations.push_back(ZExt);
+ continue;
+ }
+
+ if (SExtInst *SExt = dyn_cast<SExtInst>(V)) {
+ V = SExt->getOperand(0);
+ Operations.push_back(SExt);
+ continue;
+ }
+
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Xor)
+ return;
+
+ ConstantInt *CInt = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (!CInt)
+ return;
+
+ V = BinOp->getOperand(0);
+ Operations.push_back(BinOp);
+ }
+
+ // Executes the recorded operations on input 'Value'.
+ auto ApplyOperations = [&](const APInt &Value) {
+ APInt Result = Value;
+ for (auto Op : llvm::reverse(Operations)) {
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ Result ^= cast<ConstantInt>(Op->getOperand(1))->getValue();
+ break;
+ case Instruction::ZExt:
+ Result = Result.zext(Op->getType()->getIntegerBitWidth());
+ break;
+ case Instruction::SExt:
+ Result = Result.sext(Op->getType()->getIntegerBitWidth());
+ break;
+ default:
+ llvm_unreachable("Unexpected operation");
+ }
+ }
+ return Result;
+ };
+
+ auto *PhiDef = dyn_cast<PHINode>(V);
+
+ // Get the first dominating conditional branch of the operand
+ // i's incoming block.
+ auto GetDomConditional = [&](unsigned i) -> BranchInst * {
+ BasicBlock *BB = PhiDef->getIncomingBlock(i);
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (BI && BI->isConditional())
+ return BI;
+ BB = BB->getSinglePredecessor();
+ if (!BB)
+ return nullptr;
+ BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return nullptr;
+ return BI;
+ };
+
+ // Now walk through all Phi operands to find phi oprerands with values
+ // conflicting with the expected phi output value. Any such operand
+ // indicates the incoming edge to that operand is unlikely.
+ for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) {
+
+ Value *PhiOpnd = PhiDef->getIncomingValue(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
+ if (!CI)
+ continue;
+
+ // Not an interesting case when IsUnlikely is false -- we can not infer
+ // anything useful when the operand value matches the expected phi
+ // output.
+ if (ExpectedPhiValue == ApplyOperations(CI->getValue()))
+ continue;
+
+ BranchInst *BI = GetDomConditional(i);
+ if (!BI)
+ continue;
+
+ MDBuilder MDB(PhiDef->getContext());
+
+ // There are two situations in which an operand of the PhiDef comes
+ // from a given successor of a branch instruction BI.
+ // 1) When the incoming block of the operand is the successor block;
+ // 2) When the incoming block is BI's enclosing block and the
+ // successor is the PhiDef's enclosing block.
+ //
+ // Returns true if the operand which comes from OpndIncomingBB
+ // comes from outgoing edge of BI that leads to Succ block.
+ auto *OpndIncomingBB = PhiDef->getIncomingBlock(i);
+ auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) {
+ if (OpndIncomingBB == Succ)
+ // If this successor is the incoming block for this
+ // Phi operand, then this successor does lead to the Phi.
+ return true;
+ if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent())
+ // Otherwise, if the edge is directly from the branch
+ // to the Phi, this successor is the one feeding this
+ // Phi operand.
+ return true;
+ return false;
+ };
+
+ if (IsOpndComingFromSuccessor(BI->getSuccessor(1)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight));
+ else if (IsOpndComingFromSuccessor(BI->getSuccessor(0)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight));
+ }
+}
+
// Handle both BranchInst and SelectInst.
template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
@@ -99,25 +243,31 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
CmpInst::Predicate Predicate;
- uint64_t ValueComparedTo = 0;
+ ConstantInt *CmpConstOperand = nullptr;
if (!CmpI) {
CI = dyn_cast<CallInst>(BSI.getCondition());
Predicate = CmpInst::ICMP_NE;
- ValueComparedTo = 0;
} else {
Predicate = CmpI->getPredicate();
if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ)
return false;
- ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
+
+ CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
if (!CmpConstOperand)
return false;
- ValueComparedTo = CmpConstOperand->getZExtValue();
CI = dyn_cast<CallInst>(CmpI->getOperand(0));
}
if (!CI)
return false;
+ uint64_t ValueComparedTo = 0;
+ if (CmpConstOperand) {
+ if (CmpConstOperand->getBitWidth() > 64)
+ return false;
+ ValueComparedTo = CmpConstOperand->getZExtValue();
+ }
+
Function *Fn = CI->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
return false;
@@ -181,6 +331,10 @@ static bool lowerExpectIntrinsic(Function &F) {
Function *Fn = CI->getCalledFunction();
if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ // Before erasing the llvm.expect, walk backward to find
+ // phi that define llvm.expect's first arg, and
+ // infer branch probability:
+ handlePhiDef(CI);
Value *Exp = CI->getArgOperand(0);
CI->replaceAllUsesWith(Exp);
CI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 77b2bd84f9b61..350b50ffcdd41 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Rewrite an existing set of gc.statepoints such that they make potential
-// relocations performed by the garbage collector explicit in the IR.
+// Rewrite call/invoke instructions so as to make potential relocations
+// performed by the garbage collector explicit in the IR.
//
//===----------------------------------------------------------------------===//
@@ -2094,9 +2094,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// live in the IR. We'll remove all of these when done.
SmallVector<CallInst *, 64> Holders;
- // Insert a dummy call with all of the arguments to the vm_state we'll need
- // for the actual safepoint insertion. This ensures reference arguments in
- // the deopt argument list are considered live through the safepoint (and
+ // Insert a dummy call with all of the deopt operands we'll need for the
+ // actual safepoint insertion as arguments. This ensures reference operands
+ // in the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6e113bccff94c..fb1b5813fd79f 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3698,7 +3698,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+ auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
// Either lookup a split load or create one.
LoadInst *PLoad;
@@ -3709,7 +3710,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, LoadBasePtr->getName() + "."),
+ LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
@@ -3719,7 +3720,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1ec3d0d496374..1c1a75c111e96 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -37,10 +37,10 @@
using namespace llvm;
/// See comments in Cloning.h.
-BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
- ValueToValueMapTy &VMap,
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
- ClonedCodeInfo *CodeInfo) {
+ ClonedCodeInfo *CodeInfo,
+ DebugInfoFinder *DIFinder) {
DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
@@ -50,10 +50,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
// Loop over all instructions, and copy them over.
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
+
+ if (DIFinder && F->getParent() && II->getDebugLoc())
+ DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get());
+
Instruction *NewInst = II->clone();
- if (F && F->getSubprogram())
- DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(),
- F->getSubprogram(), Cache);
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
@@ -122,31 +123,38 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
OldAttrs.getRetAttributes(), NewArgAttrs));
+ bool MustCloneSP =
+ OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
+ DISubprogram *SP = OldFunc->getSubprogram();
+ if (SP) {
+ assert(!MustCloneSP || ModuleLevelChanges);
+ // Add mappings for some DebugInfo nodes that we don't want duplicated
+ // even if they're distinct.
+ auto &MD = VMap.MD();
+ MD[SP->getUnit()].reset(SP->getUnit());
+ MD[SP->getType()].reset(SP->getType());
+ MD[SP->getFile()].reset(SP->getFile());
+ // If we're not cloning into the same module, no need to clone the
+ // subprogram
+ if (!MustCloneSP)
+ MD[SP].reset(SP);
+ }
+
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
for (auto MD : MDs) {
- MDNode *NewMD;
- bool MustCloneSP =
- (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() &&
- OldFunc->getParent() == NewFunc->getParent());
- if (MustCloneSP) {
- auto *SP = cast<DISubprogram>(MD.second);
- NewMD = DISubprogram::getDistinct(
- NewFunc->getContext(), SP->getScope(), SP->getName(),
- SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(),
- SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(),
- SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(),
- SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(),
- SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(),
- SP->getVariables(), SP->getThrownTypes());
- } else
- NewMD =
- MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
- NewFunc->addMetadata(MD.first, *NewMD);
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
}
+ // When we remap instructions, we want to avoid duplicating inlined
+ // DISubprograms, so record all subprograms we find as we duplicate
+ // instructions and then freeze them in the MD map.
+ DebugInfoFinder DIFinder;
+
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
@@ -156,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
+ SP ? &DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
@@ -178,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
+ for (DISubprogram *ISP : DIFinder.subprograms()) {
+ if (ISP != SP) {
+ VMap.MD()[ISP].reset(ISP);
+ }
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
@@ -226,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "",
+ CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
CodeInfo);
return NewF;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b9a64c220ccd..799eef21dc4ea 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4779,6 +4779,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
scalarizeInstruction(&I, true);
break;
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -7396,6 +7397,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// likely.
return Cost / getReciprocalPredBlockProb();
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6f78e6b94a34..d1349535f2982 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -259,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
return (CI->getArgOperand(1) == Scalar);
}
+ LLVM_FALLTHROUGH;
}
default:
return false;
@@ -4749,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
-namespace {
-/// Tracks instructons and its children.
-class WeakTrackingVHWithLevel final : public CallbackVH {
- /// Operand index of the instruction currently beeing analized.
- unsigned Level = 0;
- /// Is this the instruction that should be vectorized, or are we now
- /// processing children (i.e. operands of this instruction) for potential
- /// vectorization?
- bool IsInitial = true;
-
-public:
- explicit WeakTrackingVHWithLevel() = default;
- WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
- /// Restart children analysis each time it is repaced by the new instruction.
- void allUsesReplacedWith(Value *New) override {
- setValPtr(New);
- Level = 0;
- IsInitial = true;
- }
- /// Check if the instruction was not deleted during vectorization.
- bool isValid() const { return !getValPtr(); }
- /// Is the istruction itself must be vectorized?
- bool isInitial() const { return IsInitial; }
- /// Try to vectorize children.
- void clearInitial() { IsInitial = false; }
- /// Are all children processed already?
- bool isFinal() const {
- assert(getValPtr() &&
- (isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() >= Level));
- return getValPtr() &&
- cast<Instruction>(getValPtr())->getNumOperands() == Level;
- }
- /// Get next child operation.
- Value *nextOperand() {
- assert(getValPtr() && isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() > Level);
- return cast<Instruction>(getValPtr())->getOperand(Level++);
- }
- virtual ~WeakTrackingVHWithLevel() = default;
-};
-} // namespace
-
-/// \brief Attempt to reduce a horizontal reduction.
-/// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators Root in a basic block BB, then check
-/// if it can be done.
-/// \returns true if a horizontal reduction was matched and reduced.
-/// \returns false if a horizontal reduction was not matched.
-static bool canBeVectorized(
+/// Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding the phi node \a P
+/// with reduction operators \a Root (or one of its operands) in a basic block
+/// \a BB, then check if it can be done. If horizontal reduction is not found
+/// and root instruction is a binary operation, vectorization of the operands is
+/// attempted.
+/// \returns true if a horizontal reduction was matched and reduced or operands
+/// of one of the binary instruction were vectorized.
+/// \returns false if a horizontal reduction was not matched (or not possible)
+/// or no vectorization of any binary operation feeding \a Root instruction was
+/// performed.
+static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI,
const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
@@ -4810,56 +4773,62 @@ static bool canBeVectorized(
if (Root->getParent() != BB)
return false;
- SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
+ // Start analysis starting from Root instruction. If horizontal reduction is
+ // found, try to vectorize it. If it is not a horizontal reduction or
+ // vectorization is not possible or not effective, and currently analyzed
+ // instruction is a binary operation, try to vectorize the operands, using
+ // pre-order DFS traversal order. If the operands were not vectorized, repeat
+ // the same procedure considering each operand as a possible root of the
+ // horizontal reduction.
+ // Interrupt the process if the Root instruction itself was vectorized or all
+ // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
+ SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
SmallSet<Value *, 8> VisitedInstrs;
bool Res = false;
while (!Stack.empty()) {
- Value *V = Stack.back();
- if (!V) {
- Stack.pop_back();
+ Value *V;
+ unsigned Level;
+ std::tie(V, Level) = Stack.pop_back_val();
+ if (!V)
continue;
- }
auto *Inst = dyn_cast<Instruction>(V);
- if (!Inst || isa<PHINode>(Inst)) {
- Stack.pop_back();
+ if (!Inst || isa<PHINode>(Inst))
continue;
- }
- if (Stack.back().isInitial()) {
- Stack.back().clearInitial();
- if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, BI)) {
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- P = nullptr;
- continue;
- }
- }
- if (P) {
- Inst = dyn_cast<Instruction>(BI->getOperand(0));
- if (Inst == P)
- Inst = dyn_cast<Instruction>(BI->getOperand(1));
- if (!Inst) {
- P = nullptr;
- continue;
- }
+ if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, BI)) {
+ if (HorRdx.tryToReduce(R, TTI)) {
+ Res = true;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
}
}
- P = nullptr;
- if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
- Res = true;
- continue;
+ if (P) {
+ Inst = dyn_cast<Instruction>(BI->getOperand(0));
+ if (Inst == P)
+ Inst = dyn_cast<Instruction>(BI->getOperand(1));
+ if (!Inst) {
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
+ }
}
}
- if (Stack.back().isFinal()) {
- Stack.pop_back();
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ Res = true;
continue;
}
- if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
- if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
- Stack.size() < RecursionMaxDepth)
- Stack.push_back(NextV);
+ // Try to vectorize operands.
+ if (++Level < RecursionMaxDepth)
+ for (auto *Op : Inst->operand_values())
+ Stack.emplace_back(Op, Level);
}
return Res;
}
@@ -4876,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
if (!isa<BinaryOperator>(I))
P = nullptr;
// Try to match and vectorize a horizontal reduction.
- return canBeVectorized(P, I, BB, R, TTI,
- [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
- return tryToVectorize(BI, R);
- });
+ return tryToVectorizeHorReductionOrInstOperands(
+ P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
+ return tryToVectorize(BI, R);
+ });
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {