summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp136
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp7
-rw-r--r--lib/Analysis/AliasAnalysisSummary.cpp18
-rw-r--r--lib/Analysis/AliasAnalysisSummary.h22
-rw-r--r--lib/Analysis/AliasSetTracker.cpp131
-rw-r--r--lib/Analysis/Analysis.cpp7
-rw-r--r--lib/Analysis/AssumptionCache.cpp35
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp239
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp12
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp18
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp15
-rw-r--r--lib/Analysis/CFG.cpp83
-rw-r--r--lib/Analysis/CFGPrinter.cpp7
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp16
-rw-r--r--lib/Analysis/CFLGraph.h68
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp7
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp13
-rw-r--r--lib/Analysis/CallGraph.cpp32
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp94
-rw-r--r--lib/Analysis/CallPrinter.cpp7
-rw-r--r--lib/Analysis/CaptureTracking.cpp39
-rw-r--r--lib/Analysis/CmpInstAnalysis.cpp7
-rw-r--r--lib/Analysis/CodeMetrics.cpp18
-rw-r--r--lib/Analysis/ConstantFolding.cpp1099
-rw-r--r--lib/Analysis/CostModel.cpp7
-rw-r--r--lib/Analysis/Delinearization.cpp7
-rw-r--r--lib/Analysis/DemandedBits.cpp35
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp51
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp7
-rw-r--r--lib/Analysis/DomPrinter.cpp7
-rw-r--r--lib/Analysis/DomTreeUpdater.cpp533
-rw-r--r--lib/Analysis/DominanceFrontier.cpp7
-rw-r--r--lib/Analysis/EHPersonalities.cpp7
-rw-r--r--lib/Analysis/GlobalsModRef.cpp39
-rw-r--r--lib/Analysis/GuardUtils.cpp36
-rw-r--r--lib/Analysis/IVDescriptors.cpp33
-rw-r--r--lib/Analysis/IVUsers.cpp7
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp7
-rw-r--r--lib/Analysis/InlineCost.cpp424
-rw-r--r--lib/Analysis/InstCount.cpp7
-rw-r--r--lib/Analysis/InstructionPrecedenceTracking.cpp11
-rw-r--r--lib/Analysis/InstructionSimplify.cpp713
-rw-r--r--lib/Analysis/Interval.cpp7
-rw-r--r--lib/Analysis/IntervalPartition.cpp7
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp110
-rw-r--r--lib/Analysis/LazyBlockFrequencyInfo.cpp7
-rw-r--r--lib/Analysis/LazyBranchProbabilityInfo.cpp7
-rw-r--r--lib/Analysis/LazyCallGraph.cpp20
-rw-r--r--lib/Analysis/LazyValueInfo.cpp192
-rw-r--r--lib/Analysis/LegacyDivergenceAnalysis.cpp7
-rw-r--r--lib/Analysis/Lint.cpp15
-rw-r--r--lib/Analysis/Loads.cpp44
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp94
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp14
-rw-r--r--lib/Analysis/LoopInfo.cpp353
-rw-r--r--lib/Analysis/LoopPass.cpp20
-rw-r--r--lib/Analysis/LoopUnrollAnalyzer.cpp7
-rw-r--r--lib/Analysis/MemDepPrinter.cpp7
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp12
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp137
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp42
-rw-r--r--lib/Analysis/MemoryLocation.cpp7
-rw-r--r--lib/Analysis/MemorySSA.cpp315
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp239
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp7
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp276
-rw-r--r--lib/Analysis/MustExecute.cpp16
-rw-r--r--lib/Analysis/ObjCARCAliasAnalysis.cpp32
-rw-r--r--lib/Analysis/ObjCARCAnalysisUtils.cpp7
-rw-r--r--lib/Analysis/ObjCARCInstKind.cpp42
-rw-r--r--lib/Analysis/OptimizationRemarkEmitter.cpp7
-rw-r--r--lib/Analysis/OrderedBasicBlock.cpp31
-rw-r--r--lib/Analysis/OrderedInstructions.cpp7
-rw-r--r--lib/Analysis/PHITransAddr.cpp7
-rw-r--r--lib/Analysis/PhiValues.cpp7
-rw-r--r--lib/Analysis/PostDominators.cpp7
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp26
-rw-r--r--lib/Analysis/PtrUseVisitor.cpp15
-rw-r--r--lib/Analysis/RegionInfo.cpp7
-rw-r--r--lib/Analysis/RegionPass.cpp16
-rw-r--r--lib/Analysis/RegionPrinter.cpp7
-rw-r--r--lib/Analysis/ScalarEvolution.cpp794
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp14
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp267
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp7
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp28
-rw-r--r--lib/Analysis/StackSafetyAnalysis.cpp11
-rw-r--r--lib/Analysis/StratifiedSets.h7
-rw-r--r--lib/Analysis/SyncDependenceAnalysis.cpp35
-rw-r--r--lib/Analysis/SyntheticCountsUtils.cpp7
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp431
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp184
-rw-r--r--lib/Analysis/Trace.cpp7
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp35
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp7
-rw-r--r--lib/Analysis/ValueLattice.cpp7
-rw-r--r--lib/Analysis/ValueLatticeUtils.cpp7
-rw-r--r--lib/Analysis/ValueTracking.cpp1204
-rw-r--r--lib/Analysis/VectorUtils.cpp148
99 files changed, 5867 insertions, 3581 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 3446aef39938..32241e355eb8 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -1,9 +1,8 @@
//==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,12 +79,16 @@ AAResults::~AAResults() {
bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
- // Check if the AA manager itself has been invalidated.
+ // AAResults preserves the AAManager by default, due to the stateless nature
+ // of AliasAnalysis. There is no need to check whether it has been preserved
+ // explicitly. Check if any module dependency was invalidated and caused the
+ // AAManager to be invalidated. Invalidate ourselves in that case.
auto PAC = PA.getChecker<AAManager>();
- if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
- return true; // The manager needs to be blown away, clear everything.
+ if (!PAC.preservedWhenStateless())
+ return true;
- // Check all of the dependencies registered.
+ // Check if any of the function dependencies were invalidated, and invalidate
+ // ourselves in that case.
for (AnalysisKey *ID : AADeps)
if (Inv.invalidate(ID, F, PA))
return true;
@@ -100,8 +103,14 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
AliasResult AAResults::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
+ AAQueryInfo AAQIP;
+ return alias(LocA, LocB, AAQIP);
+}
+
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) {
for (const auto &AA : AAs) {
- auto Result = AA->alias(LocA, LocB);
+ auto Result = AA->alias(LocA, LocB, AAQI);
if (Result != MayAlias)
return Result;
}
@@ -110,8 +119,14 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
+ AAQueryInfo AAQIP;
+ return pointsToConstantMemory(Loc, AAQIP, OrLocal);
+}
+
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI, bool OrLocal) {
for (const auto &AA : AAs)
- if (AA->pointsToConstantMemory(Loc, OrLocal))
+ if (AA->pointsToConstantMemory(Loc, AAQI, OrLocal))
return true;
return false;
@@ -132,10 +147,16 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
}
ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(I, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
// We may have two calls.
if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
- return getModRefInfo(Call1, Call2);
+ return getModRefInfo(Call1, Call2, AAQI);
} else if (I->isFenceLike()) {
// If this is a fence, just return ModRef.
return ModRefInfo::ModRef;
@@ -145,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call2, DefLoc);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
if (isModOrRefSet(MR))
return setModAndRef(MR);
}
@@ -154,10 +175,17 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(Call, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc, AAQI));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -215,10 +243,16 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
const CallBase *Call2) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(Call1, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2, AAQueryInfo &AAQI) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2, AAQI));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -397,6 +431,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(L, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Be conservative in the face of atomic.
if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
return ModRefInfo::ModRef;
@@ -404,7 +444,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(L), Loc);
+ AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI);
if (AR == NoAlias)
return ModRefInfo::NoModRef;
if (AR == MustAlias)
@@ -416,12 +456,18 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(S, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Be conservative in the face of atomic.
if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(S), Loc);
+ AliasResult AR = alias(MemoryLocation::get(S), Loc, AAQI);
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
if (AR == NoAlias)
@@ -429,7 +475,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
// If the store address aliases the pointer as must alias, set Must.
@@ -442,17 +488,31 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(S, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// If we know that the location is a constant memory location, the fence
// cannot modify this location.
- if (Loc.Ptr && pointsToConstantMemory(Loc))
+ if (Loc.Ptr && pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::Ref;
return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(V, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(V), Loc);
+ AliasResult AR = alias(MemoryLocation::get(V), Loc, AAQI);
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
if (AR == NoAlias)
@@ -460,7 +520,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
// If the va_arg aliases the pointer as must alias, set Must.
@@ -474,10 +534,17 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CatchPad, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
}
@@ -487,10 +554,17 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CatchRet, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
}
@@ -500,12 +574,19 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CX, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(CX->getSuccessOrdering()))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(CX), Loc);
+ AliasResult AR = alias(MemoryLocation::get(CX), Loc, AAQI);
// If the cmpxchg address does not alias the location, it does not access
// it.
if (AR == NoAlias)
@@ -521,12 +602,19 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(RMW, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(RMW->getOrdering()))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(RMW), Loc);
+ AliasResult AR = alias(MemoryLocation::get(RMW), Loc, AAQI);
// If the atomicrmw address does not alias the location, it does not access
// it.
if (AR == NoAlias)
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 85dd4fe95b33..e83703867e09 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp
index 2b4879453beb..2f3396a44117 100644
--- a/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/lib/Analysis/AliasAnalysisSummary.cpp
@@ -73,28 +73,28 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs Attr) {
}
Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
- CallSite CS) {
+ CallBase &Call) {
auto Index = IValue.Index;
- auto Value = (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1);
- if (Value->getType()->isPointerTy())
- return InstantiatedValue{Value, IValue.DerefLevel};
+ auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1);
+ if (V->getType()->isPointerTy())
+ return InstantiatedValue{V, IValue.DerefLevel};
return None;
}
Optional<InstantiatedRelation>
-instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
- auto From = instantiateInterfaceValue(ERelation.From, CS);
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) {
+ auto From = instantiateInterfaceValue(ERelation.From, Call);
if (!From)
return None;
- auto To = instantiateInterfaceValue(ERelation.To, CS);
+ auto To = instantiateInterfaceValue(ERelation.To, Call);
if (!To)
return None;
return InstantiatedRelation{*From, *To, ERelation.Offset};
}
Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
- CallSite CS) {
- auto Value = instantiateInterfaceValue(EAttr.IValue, CS);
+ CallBase &Call) {
+ auto Value = instantiateInterfaceValue(EAttr.IValue, Call);
if (!Value)
return None;
return InstantiatedAttr{*Value, EAttr.Attr};
diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h
index fb93a12420f8..fe75b03cedef 100644
--- a/lib/Analysis/AliasAnalysisSummary.h
+++ b/lib/Analysis/AliasAnalysisSummary.h
@@ -1,9 +1,8 @@
//=====- CFLSummary.h - Abstract stratified sets implementation. --------=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -38,7 +37,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
#include <bitset>
namespace llvm {
@@ -196,12 +195,13 @@ struct AliasSummary {
SmallVector<ExternalAttribute, 8> RetParamAttributes;
};
-/// This is the result of instantiating InterfaceValue at a particular callsite
+/// This is the result of instantiating InterfaceValue at a particular call
struct InstantiatedValue {
Value *Val;
unsigned DerefLevel;
};
-Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue, CallSite);
+Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
+ CallBase &Call);
inline bool operator==(InstantiatedValue LHS, InstantiatedValue RHS) {
return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel;
@@ -229,8 +229,8 @@ struct InstantiatedRelation {
InstantiatedValue From, To;
int64_t Offset;
};
-Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
- CallSite);
+Optional<InstantiatedRelation>
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call);
/// This is the result of instantiating ExternalAttribute at a particular
/// callsite
@@ -238,8 +238,8 @@ struct InstantiatedAttr {
InstantiatedValue IValue;
AliasAttrs Attr;
};
-Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute,
- CallSite);
+Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
+ CallBase &Call);
}
template <> struct DenseMapInfo<cflaa::InstantiatedValue> {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index f6ad704cc914..a6e5b9fab558 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -1,9 +1,8 @@
//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,9 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -127,24 +128,24 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
LocationSize Size, const AAMDNodes &AAInfo,
- bool KnownMustAlias) {
+ bool KnownMustAlias, bool SkipSizeUpdate) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
// Check to see if we have to downgrade to _may_ alias.
- if (isMustAlias() && !KnownMustAlias)
+ if (isMustAlias())
if (PointerRec *P = getSomePointer()) {
- AliasAnalysis &AA = AST.getAliasAnalysis();
- AliasResult Result =
- AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
- MemoryLocation(Entry.getValue(), Size, AAInfo));
- if (Result != MustAlias) {
- Alias = SetMayAlias;
- AST.TotalMayAliasSetSize += size();
- } else {
- // First entry of must alias must have maximum size!
+ if (!KnownMustAlias) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasResult Result = AA.alias(
+ MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
+ MemoryLocation(Entry.getValue(), Size, AAInfo));
+ if (Result != MustAlias) {
+ Alias = SetMayAlias;
+ AST.TotalMayAliasSetSize += size();
+ }
+ assert(Result != NoAlias && "Cannot be part of must set!");
+ } else if (!SkipSizeUpdate)
P->updateSizeAndAAInfo(Size, AAInfo);
- }
- assert(Result != NoAlias && "Cannot be part of must set!");
}
Entry.setAliasSet(this);
@@ -184,14 +185,15 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
Access = ModRefAccess;
}
-/// aliasesPointer - Return true if the specified pointer "may" (or must)
-/// alias one of the members in the set.
+/// aliasesPointer - If the specified pointer "may" (or must) alias one of the
+/// members in the set return the appropriate AliasResult. Otherwise return
+/// NoAlias.
///
-bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo,
- AliasAnalysis &AA) const {
+AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
+ const AAMDNodes &AAInfo,
+ AliasAnalysis &AA) const {
if (AliasAny)
- return true;
+ return MayAlias;
if (Alias == SetMustAlias) {
assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -208,9 +210,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
// If this is a may-alias set, we have to check all of the pointers in the set
// to be sure it doesn't alias the set...
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.alias(MemoryLocation(Ptr, Size, AAInfo),
- MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
- return true;
+ if (AliasResult AR = AA.alias(
+ MemoryLocation(Ptr, Size, AAInfo),
+ MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
+ return AR;
// Check the unknown instructions...
if (!UnknownInsts.empty()) {
@@ -218,10 +221,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
if (auto *Inst = getUnknownInst(i))
if (isModOrRefSet(
AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo))))
- return true;
+ return MayAlias;
}
- return false;
+ return NoAlias;
}
bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
@@ -288,25 +291,38 @@ void AliasSetTracker::clear() {
AliasSets.clear();
}
-
/// mergeAliasSetsForPointer - Given a pointer, merge all alias sets that may
/// alias the pointer. Return the unified set, or nullptr if no set that aliases
-/// the pointer was found.
+/// the pointer was found. MustAliasAll is updated to true/false if the pointer
+/// is found to MustAlias all the sets it merged.
AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
LocationSize Size,
- const AAMDNodes &AAInfo) {
+ const AAMDNodes &AAInfo,
+ bool &MustAliasAll) {
AliasSet *FoundSet = nullptr;
+ AliasResult AllAR = MustAlias;
for (iterator I = begin(), E = end(); I != E;) {
iterator Cur = I++;
- if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
+ if (Cur->Forward)
+ continue;
+
+ AliasResult AR = Cur->aliasesPointer(Ptr, Size, AAInfo, AA);
+ if (AR == NoAlias)
+ continue;
+
+ AllAR =
+ AliasResult(AllAR & AR); // Possible downgrade to May/Partial, even No
- if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = &*Cur; // Remember it.
- } else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
+ if (!FoundSet) {
+ // If this is the first alias set ptr can go into, remember it.
+ FoundSet = &*Cur;
+ } else {
+ // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this);
}
}
+ MustAliasAll = (AllAR == MustAlias);
return FoundSet;
}
@@ -316,10 +332,13 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
iterator Cur = I++;
if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
- if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = &*Cur; // Remember it.
- else // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
+ if (!FoundSet) {
+ // If this is the first alias set ptr can go into, remember it.
+ FoundSet = &*Cur;
+ } else {
+ // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this);
+ }
}
return FoundSet;
}
@@ -329,7 +348,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
const LocationSize Size = MemLoc.Size;
const AAMDNodes &AAInfo = MemLoc.AATags;
-
+
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
if (AliasAnyAS) {
@@ -348,6 +367,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
return *AliasAnyAS;
}
+ bool MustAliasAll = false;
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
// If the size changed, we may need to merge several alias sets.
@@ -356,20 +376,21 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
// is NoAlias, mergeAliasSetsForPointer(undef, ...) will not find the
// the right set for undef, even if it exists.
if (Entry.updateSizeAndAAInfo(Size, AAInfo))
- mergeAliasSetsForPointer(Pointer, Size, AAInfo);
+ mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
- if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) {
+ if (AliasSet *AS =
+ mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll)) {
// Add it to the alias set it aliases.
- AS->addPointer(*this, Entry, Size, AAInfo);
+ AS->addPointer(*this, Entry, Size, AAInfo, MustAliasAll);
return *AS;
}
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
+ AliasSets.back().addPointer(*this, Entry, Size, AAInfo, true);
return AliasSets.back();
}
@@ -422,14 +443,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
if (!Inst->mayReadOrWriteMemory())
return; // doesn't alias anything
- AliasSet *AS = findAliasSetForUnknownInst(Inst);
- if (AS) {
+ if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) {
AS->addUnknownInst(Inst, AA);
return;
}
AliasSets.push_back(new AliasSet());
- AS = &AliasSets.back();
- AS->addUnknownInst(Inst, AA);
+ AliasSets.back().addUnknownInst(Inst, AA);
}
void AliasSetTracker::add(Instruction *I) {
@@ -516,6 +535,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
}
}
+void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() {
+ assert(MSSA && L && "MSSA and L must be available");
+ for (const BasicBlock *BB : L->blocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB))
+ for (auto &Access : *Accesses)
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
+ add(MUD->getMemoryInst());
+}
+
// deleteValue method - This method is used to remove a pointer value from the
// AliasSetTracker entirely. It should be used when an instruction is deleted
// from the program to update the AST. If you don't use this, you would have
@@ -563,9 +591,8 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
I = PointerMap.find_as(From);
// Add it to the alias set it aliases...
AliasSet *AS = I->second->getAliasSet(*this);
- AS->addPointer(*this, Entry, I->second->getSize(),
- I->second->getAAInfo(),
- true);
+ AS->addPointer(*this, Entry, I->second->getSize(), I->second->getAAInfo(),
+ true, true);
}
AliasSet &AliasSetTracker::mergeAllAliasSets() {
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index bb8742123a0f..d46a8d8e306c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -1,9 +1,8 @@
//===-- Analysis.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 8bfd24ccf77b..cf2f845dee0a 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -1,9 +1,8 @@
//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -54,11 +53,11 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
return AVIP.first->second;
}
-void AssumptionCache::updateAffectedValues(CallInst *CI) {
+static void findAffectedValues(CallInst *CI,
+ SmallVectorImpl<Value *> &Affected) {
// Note: This code must be kept in-sync with the code in
// computeKnownBitsFromAssume in ValueTracking.
- SmallVector<Value *, 16> Affected;
auto AddAffected = [&Affected](Value *V) {
if (isa<Argument>(V)) {
Affected.push_back(V);
@@ -109,6 +108,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
AddAffectedFromEq(B);
}
}
+}
+
+void AssumptionCache::updateAffectedValues(CallInst *CI) {
+ SmallVector<Value *, 16> Affected;
+ findAffectedValues(CI, Affected);
for (auto &AV : Affected) {
auto &AVV = getOrInsertAffectedValues(AV);
@@ -117,6 +121,18 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
}
}
+void AssumptionCache::unregisterAssumption(CallInst *CI) {
+ SmallVector<Value *, 16> Affected;
+ findAffectedValues(CI, Affected);
+
+ for (auto &AV : Affected) {
+ auto AVI = AffectedValues.find_as(AV);
+ if (AVI != AffectedValues.end())
+ AffectedValues.erase(AVI);
+ }
+ remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+}
+
void AssumptionCache::AffectedValueCallbackVH::deleted() {
auto AVI = AC->AffectedValues.find(getValPtr());
if (AVI != AC->AffectedValues.end())
@@ -241,6 +257,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
return *IP.first->second;
}
+AssumptionCache *AssumptionCacheTracker::lookupAssumptionCache(Function &F) {
+ auto I = AssumptionCaches.find_as(&F);
+ if (I != AssumptionCaches.end())
+ return I->second.get();
+ return nullptr;
+}
+
void AssumptionCacheTracker::verifyAnalysis() const {
// FIXME: In the long term the verifier should not be controllable with a
// flag. We should either fix all passes to correctly update the assumption
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 332eeaa00e73..3721c99883b8 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -117,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
/// Returns true if the pointer is to a function-local object that never
/// escapes from the function.
-static bool isNonEscapingLocalObject(const Value *V) {
+static bool isNonEscapingLocalObject(
+ const Value *V,
+ SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) {
+ SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
+ if (IsCapturedCache) {
+ bool Inserted;
+ std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
+ if (!Inserted)
+ // Found cached result, return it!
+ return CacheIt->second;
+ }
+
// If this is a local allocation, check to see if it escapes.
- if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
// Set StoreCaptures to True so that we can assume in our callers that the
// pointer is not the result of a load instruction. Currently
// PointerMayBeCaptured doesn't have any special analysis for the
// StoreCaptures=false case; if it did, our callers could be refined to be
// more precise.
- return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
// If this is an argument that corresponds to a byval or noalias argument,
// then it has not escaped before entering the function. Check if it escapes
// inside the function.
if (const Argument *A = dyn_cast<Argument>(V))
- if (A->hasByValAttr() || A->hasNoAliasAttr())
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
// Note even if the argument is marked nocapture, we still need to check
// for copies made inside the function. The nocapture attribute only
// specifies that there are no copies made that outlive the function.
- return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
return false;
}
@@ -613,7 +631,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
/// the function, with global constants being considered local to all
/// functions.
bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+ AAQueryInfo &AAQI, bool OrLocal) {
assert(Visited.empty() && "Visited must be cleared after use!");
unsigned MaxLookup = 8;
@@ -623,7 +641,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
if (!Visited.insert(V).second) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
// An alloca instruction defines local memory.
@@ -637,7 +655,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant()) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
continue;
}
@@ -655,7 +673,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
for (Value *IncValue : PN->incoming_values())
Worklist.push_back(IncValue);
@@ -664,7 +682,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Otherwise be conservative.
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
} while (!Worklist.empty() && --MaxLookup);
Visited.clear();
@@ -799,24 +817,25 @@ static bool notDifferentParent(const Value *O1, const Value *O2) {
#endif
AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
// If we have a directly cached entry for these locations, we have recursed
// through this once, so just return the cached results. Notably, when this
// happens, we don't clear the cache.
- auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
- if (CacheIt != AliasCache.end())
+ auto CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocA, LocB));
+ if (CacheIt != AAQI.AliasCache.end())
+ return CacheIt->second;
+
+ CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocB, LocA));
+ if (CacheIt != AAQI.AliasCache.end())
return CacheIt->second;
AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
- LocB.Size, LocB.AATags);
- // AliasCache rarely has more than 1 or 2 elements, always use
- // shrink_and_clear so it quickly returns to the inline capacity of the
- // SmallDenseMap if it ever grows larger.
- // FIXME: This should really be shrink_to_inline_capacity_and_clear().
- AliasCache.shrink_and_clear();
+ LocB.Size, LocB.AATags, AAQI);
+
VisitedPhiBBs.clear();
return Alias;
}
@@ -828,7 +847,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
/// say much about this query. We do, however, use simple "address taken"
/// analysis on local objects.
ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
assert(notDifferentParent(Call, Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
@@ -855,7 +875,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && Call != Object &&
- isNonEscapingLocalObject(Object)) {
+ isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) {
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
@@ -881,11 +901,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking.
- AliasResult AR =
- getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+ AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI),
+ MemoryLocation(Object), AAQI);
if (AR != MustAlias)
IsMustAlias = false;
- // Operand doesnt alias 'Object', continue looking for other aliases
+ // Operand doesn't alias 'Object', continue looking for other aliases
if (AR == NoAlias)
continue;
// Operand aliases 'Object', but call doesn't modify it. Strengthen
@@ -928,7 +948,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
if (isMallocOrCallocLikeFn(Call, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
- if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
+ if (getBestAAResults().alias(MemoryLocation(Call), Loc, AAQI) == NoAlias)
return ModRefInfo::NoModRef;
}
@@ -940,11 +960,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
AliasResult SrcAA, DestAA;
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
- Loc)) == MustAlias)
+ Loc, AAQI)) == MustAlias)
// Loc is exactly the memcpy source thus disjoint from memcpy dest.
return ModRefInfo::Ref;
if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
- Loc)) == MustAlias)
+ Loc, AAQI)) == MustAlias)
// The converse case.
return ModRefInfo::Mod;
@@ -1000,11 +1020,12 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
return ModRefInfo::Ref;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
@@ -1020,7 +1041,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
// heap state at the point the guard is issued needs to be consistent in case
// the guard invokes the "deopt" continuation.
- // NB! This function is *not* commutative, so we specical case two
+ // NB! This function is *not* commutative, so we special case two
// possibilities for guard intrinsics.
if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
@@ -1034,7 +1055,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
: ModRefInfo::NoModRef;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
@@ -1266,11 +1287,10 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
/// We know that V1 is a GEP, but we don't know anything about V2.
/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
/// V2.
-AliasResult
-BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
- const AAMDNodes &V1AAInfo, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2) {
+AliasResult BasicAAResult::aliasGEP(
+ const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
+ const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
DecomposedGEP DecompGEP1, DecompGEP2;
unsigned MaxPointerSize = getMaxPointerSize(DL);
DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
@@ -1306,14 +1326,14 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// Do the base pointers alias?
AliasResult BaseAlias =
aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
- UnderlyingV2, LocationSize::unknown(), AAMDNodes());
+ UnderlyingV2, LocationSize::unknown(), AAMDNodes(), AAQI);
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
- AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
- UnderlyingV2, V2Size, V2AAInfo);
+ AliasResult PreciseBaseAlias = aliasCheck(
+ UnderlyingV1, V1Size, V1AAInfo, UnderlyingV2, V2Size, V2AAInfo, AAQI);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -1368,9 +1388,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
return MayAlias;
- AliasResult R =
- aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
- LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
+ AliasResult R = aliasCheck(UnderlyingV1, LocationSize::unknown(),
+ AAMDNodes(), V2, LocationSize::unknown(),
+ V2AAInfo, AAQI, nullptr, UnderlyingV2);
if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1504,37 +1524,35 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
/// against another.
-AliasResult BasicAAResult::aliasSelect(const SelectInst *SI,
- LocationSize SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, LocationSize V2Size,
- const AAMDNodes &V2AAInfo,
- const Value *UnderV2) {
+AliasResult
+BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize,
+ const AAMDNodes &SIAAInfo, const Value *V2,
+ LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderV2, AAQueryInfo &AAQI) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
- AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
- SI2->getTrueValue(), V2Size, V2AAInfo);
+ AliasResult Alias =
+ aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, SI2->getTrueValue(),
+ V2Size, V2AAInfo, AAQI);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
- SI2->getFalseValue(), V2Size, V2AAInfo);
+ SI2->getFalseValue(), V2Size, V2AAInfo, AAQI);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
- AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
- SISize, SIAAInfo, UnderV2);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+ SISize, SIAAInfo, AAQI, UnderV2);
if (Alias == MayAlias)
return MayAlias;
- AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
- UnderV2);
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(),
+ SISize, SIAAInfo, AAQI, UnderV2);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1544,7 +1562,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size,
const AAMDNodes &V2AAInfo,
- const Value *UnderV2) {
+ const Value *UnderV2, AAQueryInfo &AAQI) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1554,8 +1572,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
- LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ AAQueryInfo::LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
+ MemoryLocation(V2, V2Size, V2AAInfo));
if (PN > V2)
std::swap(Locs.first, Locs.second);
// Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1566,25 +1584,33 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// that causes a MayAlias.
// Pretend the phis do not alias.
AliasResult Alias = NoAlias;
- assert(AliasCache.count(Locs) &&
- "There must exist an entry for the phi node");
- AliasResult OrigAliasResult = AliasCache[Locs];
- AliasCache[Locs] = NoAlias;
+ AliasResult OrigAliasResult;
+ {
+ // Limited lifetime iterator invalidated by the aliasCheck call below.
+ auto CacheIt = AAQI.AliasCache.find(Locs);
+ assert((CacheIt != AAQI.AliasCache.end()) &&
+ "There must exist an entry for the phi node");
+ OrigAliasResult = CacheIt->second;
+ CacheIt->second = NoAlias;
+ }
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2AAInfo);
+ V2Size, V2AAInfo, AAQI);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
}
// Reset if speculation failed.
- if (Alias != NoAlias)
- AliasCache[Locs] = OrigAliasResult;
-
+ if (Alias != NoAlias) {
+ auto Pair =
+ AAQI.AliasCache.insert(std::make_pair(Locs, OrigAliasResult));
+ assert(!Pair.second && "Entry must have existed");
+ Pair.first->second = OrigAliasResult;
+ }
return Alias;
}
@@ -1658,9 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
if (isRecursive)
PNSize = LocationSize::unknown();
- AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
- PNSize, PNAAInfo, UnderV2);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize,
+ PNAAInfo, AAQI, UnderV2);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
@@ -1673,7 +1698,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
Value *V = V1Srcs[i];
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
+ aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, AAQI, UnderV2);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1687,7 +1712,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
AAMDNodes V1AAInfo, const Value *V2,
LocationSize V2Size, AAMDNodes V2AAInfo,
- const Value *O1, const Value *O2) {
+ AAQueryInfo &AAQI, const Value *O1,
+ const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size.isZero() || V2Size.isZero())
@@ -1755,9 +1781,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// temporary store the nocapture argument's value in a temporary memory
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
- if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ if (isEscapeSource(O1) &&
+ isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache))
return NoAlias;
- if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ if (isEscapeSource(O2) &&
+ isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache))
return NoAlias;
}
@@ -1772,12 +1800,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
- LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
+ MemoryLocation(V2, V2Size, V2AAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
- std::pair<AliasCacheTy::iterator, bool> Pair =
- AliasCache.insert(std::make_pair(Locs, MayAlias));
+ std::pair<AAQueryInfo::AliasCacheT::iterator, bool> Pair =
+ AAQI.AliasCache.try_emplace(Locs, MayAlias);
if (!Pair.second)
return Pair.first->second;
@@ -1791,9 +1819,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
AliasResult Result =
- aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI);
+ if (Result != MayAlias) {
+ auto ItInsPair = AAQI.AliasCache.insert(std::make_pair(Locs, Result));
+ assert(!ItInsPair.second && "Entry must have existed");
+ ItInsPair.first->second = Result;
+ return Result;
+ }
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1803,10 +1835,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+ if (Result != MayAlias) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
+ }
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1817,9 +1852,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result =
- aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+ if (Result != MayAlias) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
+ }
}
// If both pointers are pointing into the same object and one of them
@@ -1827,14 +1865,19 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
if (O1 == O2)
if (V1Size.isPrecise() && V2Size.isPrecise() &&
(isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
- isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
- return AliasCache[Locs] = PartialAlias;
+ isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, PartialAlias);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = PartialAlias;
+ }
// Recurse back into the best AA results we have, potentially with refined
// memory locations. We have already ensured that BasicAA has a MayAlias
// cache result for these, so any recursion back into BasicAA won't loop.
- AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
- return AliasCache[Locs] = Result;
+ AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second, AAQI);
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
}
/// Check whether two Values can be considered equivalent.
@@ -1863,7 +1906,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
for (auto *P : VisitedPhiBBs)
- if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
+ if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT, LI))
return false;
return true;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index ef27c36517ea..de183bbde173 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,11 +203,12 @@ BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
}
Optional<uint64_t>
-BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
+BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB,
+ bool AllowSynthetic) const {
if (!BFI)
return None;
- return BFI->getBlockProfileCount(*getFunction(), BB);
+ return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic);
}
Optional<uint64_t>
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 08ebcc47a807..0db6dd04a7e8 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -1,9 +1,8 @@
//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -558,14 +557,17 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
Optional<uint64_t>
BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
- const BlockNode &Node) const {
- return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+ const BlockNode &Node,
+ bool AllowSynthetic) const {
+ return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
+ AllowSynthetic);
}
Optional<uint64_t>
BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const {
- auto EntryCount = F.getEntryCount();
+ uint64_t Freq,
+ bool AllowSynthetic) const {
+ auto EntryCount = F.getEntryCount(AllowSynthetic);
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7f544b27fe9d..5eb95003f5d8 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -661,8 +660,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
if (!CI)
return false;
+ auto GetConstantInt = [](Value *V) {
+ if (auto *I = dyn_cast<BitCastInst>(V))
+ return dyn_cast<ConstantInt>(I->getOperand(0));
+ return dyn_cast<ConstantInt>(V);
+ };
+
Value *RHS = CI->getOperand(1);
- ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ ConstantInt *CV = GetConstantInt(RHS);
if (!CV)
return false;
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index aa880a62b754..18b83d6838cc 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -1,9 +1,8 @@
//===-- CFG.cpp - BasicBlock analysis --------------------------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CFG.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
@@ -120,22 +120,33 @@ static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
return L;
}
-// True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(const LoopInfo *LI,
- const BasicBlock *BB1, const BasicBlock *BB2) {
- const Loop *L1 = getOutermostLoop(LI, BB1);
- const Loop *L2 = getOutermostLoop(LI, BB2);
- return L1 != nullptr && L1 == L2;
-}
-
bool llvm::isPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
- const DominatorTree *DT, const LoopInfo *LI) {
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+ const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
DT = nullptr;
+ // We can't skip directly from a block that dominates the stop block if the
+ // exclusion block is potentially in between.
+ if (ExclusionSet && !ExclusionSet->empty())
+ DT = nullptr;
+
+ // Normally any block in a loop is reachable from any other block in a loop,
+ // however excluded blocks might partition the body of a loop to make that
+ // untrue.
+ SmallPtrSet<const Loop *, 8> LoopsWithHoles;
+ if (LI && ExclusionSet) {
+ for (auto BB : *ExclusionSet) {
+ if (const Loop *L = getOutermostLoop(LI, BB))
+ LoopsWithHoles.insert(L);
+ }
+ }
+
+ const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
+
// Limit the number of blocks we visit. The goal is to avoid run-away compile
// times on large CFGs without hampering sensible code. Arbitrarily chosen.
unsigned Limit = 32;
@@ -146,10 +157,23 @@ bool llvm::isPotentiallyReachableFromMany(
continue;
if (BB == StopBB)
return true;
+ if (ExclusionSet && ExclusionSet->count(BB))
+ continue;
if (DT && DT->dominates(BB, StopBB))
return true;
- if (LI && loopContainsBoth(LI, BB, StopBB))
- return true;
+
+ const Loop *Outer = nullptr;
+ if (LI) {
+ Outer = getOutermostLoop(LI, BB);
+ // If we're in a loop with a hole, not all blocks in the loop are
+ // reachable from all other blocks. That implies we can't simply jump to
+ // the loop's exit blocks, as that exit might need to pass through an
+ // excluded block. Clear Outer so we process BB's successors.
+ if (LoopsWithHoles.count(Outer))
+ Outer = nullptr;
+ if (StopLoop && Outer == StopLoop)
+ return true;
+ }
if (!--Limit) {
// We haven't been able to prove it one way or the other. Conservatively
@@ -157,7 +181,7 @@ bool llvm::isPotentiallyReachableFromMany(
return true;
}
- if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
+ if (Outer) {
// All blocks in a single loop are reachable from all other blocks. From
// any of these blocks, we can skip directly to the exits of the loop,
// ignoring any other blocks inside the loop body.
@@ -181,11 +205,13 @@ bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
Worklist.push_back(const_cast<BasicBlock*>(A));
return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B),
- DT, LI);
+ nullptr, DT, LI);
}
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
- const DominatorTree *DT, const LoopInfo *LI) {
+bool llvm::isPotentiallyReachable(
+ const Instruction *A, const Instruction *B,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+ const LoopInfo *LI) {
assert(A->getParent()->getParent() == B->getParent()->getParent() &&
"This analysis is function-local!");
@@ -227,11 +253,20 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
}
- if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return true;
- if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return false;
+ if (DT) {
+ if (DT->isReachableFromEntry(A->getParent()) &&
+ !DT->isReachableFromEntry(B->getParent()))
+ return false;
+ if (!ExclusionSet || ExclusionSet->empty()) {
+ if (A->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+ DT->isReachableFromEntry(B->getParent()))
+ return true;
+ if (B->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+ DT->isReachableFromEntry(A->getParent()))
+ return false;
+ }
+ }
return isPotentiallyReachableFromMany(
- Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI);
+ Worklist, const_cast<BasicBlock *>(B->getParent()), ExclusionSet, DT, LI);
}
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 6d01e9d5d447..619b675b58d8 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -1,9 +1,8 @@
//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 1c61dd369a05..690e514d4f5c 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -613,7 +612,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
auto Src = InstantiatedValue{Val, I};
// If there's an assignment edge from X to Y, it means Y is reachable from
- // X at S2 and X is reachable from Y at S1
+ // X at S3 and X is reachable from Y at S1
for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
WorkList);
@@ -876,7 +875,8 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
}
AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (LocA.Ptr == LocB.Ptr)
return MustAlias;
@@ -886,11 +886,11 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
// ConstantExpr, but every query needs to have at least one Value tied to a
// Function, and neither GlobalValues nor ConstantExprs are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
AliasResult QueryResult = query(LocA, LocB);
if (QueryResult == MayAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
return QueryResult;
}
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 12121d717433..21842ed36487 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -1,9 +1,8 @@
//===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -154,7 +152,7 @@ public:
}
};
-///A builder class used to create CFLGraph instance from a given function
+/// A builder class used to create CFLGraph instance from a given function
/// The CFL-AA that uses this builder must provide its own type as a template
/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
/// needs a way of obtaining the summary of other functions when callinsts are
@@ -183,24 +181,23 @@ template <typename CFLAA> class CFLGraphBuilder {
static bool hasUsefulEdges(ConstantExpr *CE) {
// ConstantExpr doesn't have terminators, invokes, or fences, so only
- // needs
- // to check for compares.
+ // needs to check for compares.
return CE->getOpcode() != Instruction::ICmp &&
CE->getOpcode() != Instruction::FCmp;
}
// Returns possible functions called by CS into the given SmallVectorImpl.
// Returns true if targets found, false otherwise.
- static bool getPossibleTargets(CallSite CS,
+ static bool getPossibleTargets(CallBase &Call,
SmallVectorImpl<Function *> &Output) {
- if (auto *Fn = CS.getCalledFunction()) {
+ if (auto *Fn = Call.getCalledFunction()) {
Output.push_back(Fn);
return true;
}
// TODO: If the call is indirect, we might be able to enumerate all
- // potential
- // targets of the call and return them, rather than just failing.
+ // potential targets of the call and return them, rather than just
+ // failing.
return false;
}
@@ -294,6 +291,11 @@ template <typename CFLAA> class CFLGraphBuilder {
addAssignEdge(Op2, &Inst);
}
+ void visitUnaryOperator(UnaryOperator &Inst) {
+ auto *Src = Inst.getOperand(0);
+ addAssignEdge(Src, &Inst);
+ }
+
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getNewValOperand();
@@ -370,11 +372,11 @@ template <typename CFLAA> class CFLGraphBuilder {
return !Fn->hasExactDefinition();
}
- bool tryInterproceduralAnalysis(CallSite CS,
+ bool tryInterproceduralAnalysis(CallBase &Call,
const SmallVectorImpl<Function *> &Fns) {
assert(Fns.size() > 0);
- if (CS.arg_size() > MaxSupportedArgsInSummary)
+ if (Call.arg_size() > MaxSupportedArgsInSummary)
return false;
// Exit early if we'll fail anyway
@@ -382,7 +384,7 @@ template <typename CFLAA> class CFLGraphBuilder {
if (isFunctionExternal(Fn) || Fn->isVarArg())
return false;
// Fail if the caller does not provide enough arguments
- assert(Fn->arg_size() <= CS.arg_size());
+ assert(Fn->arg_size() <= Call.arg_size());
if (!AA.getAliasSummary(*Fn))
return false;
}
@@ -393,7 +395,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamRelations = Summary->RetParamRelations;
for (auto &Relation : RetParamRelations) {
- auto IRelation = instantiateExternalRelation(Relation, CS);
+ auto IRelation = instantiateExternalRelation(Relation, Call);
if (IRelation.hasValue()) {
Graph.addNode(IRelation->From);
Graph.addNode(IRelation->To);
@@ -403,7 +405,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamAttributes = Summary->RetParamAttributes;
for (auto &Attribute : RetParamAttributes) {
- auto IAttr = instantiateExternalAttribute(Attribute, CS);
+ auto IAttr = instantiateExternalAttribute(Attribute, Call);
if (IAttr.hasValue())
Graph.addNode(IAttr->IValue, IAttr->Attr);
}
@@ -412,37 +414,35 @@ template <typename CFLAA> class CFLGraphBuilder {
return true;
}
- void visitCallSite(CallSite CS) {
- auto Inst = CS.getInstruction();
-
+ void visitCallBase(CallBase &Call) {
// Make sure all arguments and return value are added to the graph first
- for (Value *V : CS.args())
+ for (Value *V : Call.args())
if (V->getType()->isPointerTy())
addNode(V);
- if (Inst->getType()->isPointerTy())
- addNode(Inst);
+ if (Call.getType()->isPointerTy())
+ addNode(&Call);
// Check if Inst is a call to a library function that
// allocates/deallocates on the heap. Those kinds of functions do not
// introduce any aliases.
// TODO: address other common library functions such as realloc(),
// strdup(), etc.
- if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
+ if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
return;
// TODO: Add support for noalias args/all the other fun function
// attributes that we can tack on.
SmallVector<Function *, 4> Targets;
- if (getPossibleTargets(CS, Targets))
- if (tryInterproceduralAnalysis(CS, Targets))
+ if (getPossibleTargets(Call, Targets))
+ if (tryInterproceduralAnalysis(Call, Targets))
return;
// Because the function is opaque, we need to note that anything
// could have happened to the arguments (unless the function is marked
// readonly or readnone), and that the result could alias just about
// anything, too (unless the result is marked noalias).
- if (!CS.onlyReadsMemory())
- for (Value *V : CS.args()) {
+ if (!Call.onlyReadsMemory())
+ for (Value *V : Call.args()) {
if (V->getType()->isPointerTy()) {
// The argument itself escapes.
Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
@@ -453,12 +453,12 @@ template <typename CFLAA> class CFLGraphBuilder {
}
}
- if (Inst->getType()->isPointerTy()) {
- auto *Fn = CS.getCalledFunction();
+ if (Call.getType()->isPointerTy()) {
+ auto *Fn = Call.getCalledFunction();
if (Fn == nullptr || !Fn->returnDoesNotAlias())
// No need to call addNode() since we've added Inst at the
// beginning of this function and we know it is not a global.
- Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
+ Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown());
}
}
@@ -559,6 +559,7 @@ template <typename CFLAA> class CFLGraphBuilder {
}
case Instruction::Add:
+ case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
@@ -583,6 +584,11 @@ template <typename CFLAA> class CFLGraphBuilder {
break;
}
+ case Instruction::FNeg: {
+ addAssignEdge(CE->getOperand(0), CE);
+ break;
+ }
+
default:
llvm_unreachable("Unknown instruction type encountered!");
}
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 30ce13578e54..44b1834f70bf 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index fd2292ced017..a0b3f83cca6a 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -1,9 +1,8 @@
//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -111,6 +110,12 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// ...getContext().yield();
}
+ // Before we mark all of *this* SCC's analyses as preserved below, intersect
+ // this with the cross-SCC preserved analysis set. This is used to allow
+ // CGSCC passes to mutate ancestor SCCs and still trigger proper invalidation
+ // for them.
+ UR.CrossSCCPA.intersect(PA);
+
// Invalidation was handled after each pass in the above loop for the current
// SCC. Therefore, the remaining analysis results in the AnalysisManager are
// preserved. We mark this with a set so that we don't need to inspect each
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 0da678e1611b..ec5e94d499be 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -1,9 +1,8 @@
//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,7 +10,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
@@ -64,25 +62,25 @@ void CallGraph::addToCallGraph(Function *F) {
// If this function has external linkage or has its address taken, anything
// could call it.
if (!F->hasLocalLinkage() || F->hasAddressTaken())
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
+ ExternalCallingNode->addCalledFunction(nullptr, Node);
// If this function is not defined in this translation unit, it could call
// anything.
if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode.get());
+ Node->addCalledFunction(nullptr, CallsExternalNode.get());
// Look for calls by this function.
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- if (auto CS = CallSite(&I)) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CallsExternalNode.get());
+ Node->addCalledFunction(Call, CallsExternalNode.get());
else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ Node->addCalledFunction(Call, getOrInsertFunction(Callee));
}
}
}
@@ -185,10 +183,10 @@ LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); }
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
/// should be used sparingly.
-void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+void CallGraphNode::removeCallEdgeFor(CallBase &Call) {
for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
- if (I->first == CS.getInstruction()) {
+ if (I->first == &Call) {
I->second->DropRef();
*I = CalledFunctions.back();
CalledFunctions.pop_back();
@@ -228,13 +226,13 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
/// replaceCallEdge - This method replaces the edge in the node for the
/// specified call site with a new one. Note that this method takes linear
/// time, so it should be used sparingly.
-void CallGraphNode::replaceCallEdge(CallSite CS,
- CallSite NewCS, CallGraphNode *NewNode){
+void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall,
+ CallGraphNode *NewNode) {
for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
- if (I->first == CS.getInstruction()) {
+ if (I->first == &Call) {
I->second->DropRef();
- I->first = NewCS.getInstruction();
+ I->first = &NewCall;
I->second = NewNode;
NewNode->AddRef();
return;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 0aed57a39387..196ef400bc4e 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -1,9 +1,8 @@
//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,6 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Intrinsics.h"
@@ -202,7 +200,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
/// This never happens in checking mode.
bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CheckingMode) {
- DenseMap<Value*, CallGraphNode*> CallSites;
+ DenseMap<Value *, CallGraphNode *> Calls;
LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
<< " nodes:\n";
@@ -231,21 +229,21 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
// If this call site is null, then the function pass deleted the call
// entirely and the WeakTrackingVH nulled it out.
+ auto *Call = dyn_cast_or_null<CallBase>(I->first);
if (!I->first ||
// If we've already seen this call site, then the FunctionPass RAUW'd
// one call with another, which resulted in two "uses" in the edge
// list of the same call.
- CallSites.count(I->first) ||
+ Calls.count(I->first) ||
// If the call edge is not from a call or invoke, or it is a
// instrinsic call, then the function pass RAUW'd a call with
// another value. This can happen when constant folding happens
// of well known functions etc.
- !CallSite(I->first) ||
- (CallSite(I->first).getCalledFunction() &&
- CallSite(I->first).getCalledFunction()->isIntrinsic() &&
- Intrinsic::isLeaf(
- CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
+ !Call ||
+ (Call->getCalledFunction() &&
+ Call->getCalledFunction()->isIntrinsic() &&
+ Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()))) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -269,15 +267,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
continue;
}
- assert(!CallSites.count(I->first) &&
+ assert(!Calls.count(I->first) &&
"Call site occurs in node multiple times");
- CallSite CS(I->first);
- if (CS) {
- Function *Callee = CS.getCalledFunction();
+ if (Call) {
+ Function *Callee = Call->getCalledFunction();
// Ignore intrinsics because they're not really function calls.
if (!Callee || !(Callee->isIntrinsic()))
- CallSites.insert(std::make_pair(I->first, I->second));
+ Calls.insert(std::make_pair(I->first, I->second));
}
++I;
}
@@ -288,23 +285,25 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- CallSite CS(&I);
- if (!CS) continue;
- Function *Callee = CS.getCalledFunction();
- if (Callee && Callee->isIntrinsic()) continue;
+ auto *Call = dyn_cast<CallBase>(&I);
+ if (!Call)
+ continue;
+ Function *Callee = Call->getCalledFunction();
+ if (Callee && Callee->isIntrinsic())
+ continue;
// If this call site already existed in the callgraph, just verify it
- // matches up to expectations and remove it from CallSites.
- DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
- CallSites.find(CS.getInstruction());
- if (ExistingIt != CallSites.end()) {
+ // matches up to expectations and remove it from Calls.
+ DenseMap<Value *, CallGraphNode *>::iterator ExistingIt =
+ Calls.find(Call);
+ if (ExistingIt != Calls.end()) {
CallGraphNode *ExistingNode = ExistingIt->second;
- // Remove from CallSites since we have now seen it.
- CallSites.erase(ExistingIt);
+ // Remove from Calls since we have now seen it.
+ Calls.erase(ExistingIt);
// Verify that the callee is right.
- if (ExistingNode->getFunction() == CS.getCalledFunction())
+ if (ExistingNode->getFunction() == Call->getCalledFunction())
continue;
// If we are in checking mode, we are not allowed to actually mutate
@@ -312,7 +311,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// callgraph is less precise than it could be (e.g. an indirect call
// site could be turned direct), don't reject it in checking mode, and
// don't tweak it to be more precise.
- if (CheckingMode && CS.getCalledFunction() &&
+ if (CheckingMode && Call->getCalledFunction() &&
ExistingNode->getFunction() == nullptr)
continue;
@@ -322,7 +321,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// If not, we either went from a direct call to indirect, indirect to
// direct, or direct to different direct.
CallGraphNode *CalleeNode;
- if (Function *Callee = CS.getCalledFunction()) {
+ if (Function *Callee = Call->getCalledFunction()) {
CalleeNode = CG.getOrInsertFunction(Callee);
// Keep track of whether we turned an indirect call into a direct
// one.
@@ -336,7 +335,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
}
// Update the edge target in CGN.
- CGN->replaceCallEdge(CS, CS, CalleeNode);
+ CGN->replaceCallEdge(*Call, *Call, CalleeNode);
MadeChange = true;
continue;
}
@@ -346,7 +345,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// If the call site didn't exist in the CGN yet, add it.
CallGraphNode *CalleeNode;
- if (Function *Callee = CS.getCalledFunction()) {
+ if (Function *Callee = Call->getCalledFunction()) {
CalleeNode = CG.getOrInsertFunction(Callee);
++NumDirectAdded;
} else {
@@ -354,7 +353,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
++NumIndirectAdded;
}
- CGN->addCalledFunction(CS, CalleeNode);
+ CGN->addCalledFunction(Call, CalleeNode);
MadeChange = true;
}
@@ -376,12 +375,12 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// they are dangling pointers. WeakTrackingVH should save us for this, so
// abort if
// this happens.
- assert(CallSites.empty() && "Dangling pointers found in call sites map");
+ assert(Calls.empty() && "Dangling pointers found in call sites map");
// Periodically do an explicit clear to remove tombstones when processing
// large scc's.
if ((FunctionNo & 15) == 15)
- CallSites.clear();
+ Calls.clear();
}
LLVM_DEBUG(if (MadeChange) {
@@ -682,11 +681,28 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS,
return new PrintCallGraphPass(Banner, OS);
}
+static std::string getDescription(const CallGraphSCC &SCC) {
+ std::string Desc = "SCC (";
+ bool First = true;
+ for (CallGraphNode *CGN : SCC) {
+ if (First)
+ First = false;
+ else
+ Desc += ", ";
+ Function *F = CGN->getFunction();
+ if (F)
+ Desc += F->getName();
+ else
+ Desc += "<<null function>>";
+ }
+ Desc += ")";
+ return Desc;
+}
+
bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
- return !SCC.getCallGraph().getModule()
- .getContext()
- .getOptPassGate()
- .shouldRunPass(this, SCC);
+ OptPassGate &Gate =
+ SCC.getCallGraph().getModule().getContext().getOptPassGate();
+ return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(SCC));
}
char DummyCGSCCPass::ID = 0;
diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp
index e7017e77652a..d24cbd104bf6 100644
--- a/lib/Analysis/CallPrinter.cpp
+++ b/lib/Analysis/CallPrinter.cpp
@@ -1,9 +1,8 @@
//===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 669f4f2835fa..adaa83a6c443 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -1,9 +1,8 @@
//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,14 +101,14 @@ namespace {
SmallVector<BasicBlock*, 32> Worklist;
Worklist.append(succ_begin(BB), succ_end(BB));
- return !isPotentiallyReachableFromMany(Worklist, BB, DT);
+ return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT);
}
// If the value is defined in the same basic block as use and BeforeHere,
// there is no need to explore the use if BeforeHere dominates use.
// Check whether there is a path from I to BeforeHere.
if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !isPotentiallyReachable(I, BeforeHere, DT))
+ !isPotentiallyReachable(I, BeforeHere, nullptr, DT))
return true;
return false;
@@ -331,14 +330,32 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
AddUses(I);
break;
case Instruction::ICmp: {
- // Don't count comparisons of a no-alias return value against null as
- // captures. This allows us to ignore comparisons of malloc results
- // with null, for example.
- if (ConstantPointerNull *CPN =
- dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
if (CPN->getType()->getAddressSpace() == 0)
if (isNoAliasCall(V->stripPointerCasts()))
break;
+ if (!I->getFunction()->nullPointerIsDefined()) {
+ auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
+ // An inbounds GEP can either be a valid pointer (pointing into
+ // or to the end of an allocation), or be null in the default
+ // address space. So for an inbounds GEPs there is no way to let
+ // the pointer escape using clever GEP hacking because doing so
+ // would make the pointer point outside of the allocated object
+ // and thus make the GEP result a poison value.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+ if (GEP->isInBounds())
+ break;
+ // Comparing a dereferenceable_or_null argument against null
+ // cannot lead to pointer escapes, because if it is not null it
+ // must be a valid (in-bounds) pointer.
+ bool CanBeNull;
+ if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+ break;
+ }
+ }
// Comparison against value stored in global variable. Given the pointer
// does not escape, its value cannot be guessed and stored separately in a
// global variable.
diff --git a/lib/Analysis/CmpInstAnalysis.cpp b/lib/Analysis/CmpInstAnalysis.cpp
index 27071babec5c..a5757be2c4f4 100644
--- a/lib/Analysis/CmpInstAnalysis.cpp
+++ b/lib/Analysis/CmpInstAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 46cc87d2b178..627d955c865f 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -1,9 +1,8 @@
//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
@@ -126,14 +124,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
continue;
// Special handling for calls.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
-
- if (const Function *F = CS.getCalledFunction()) {
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
+ if (const Function *F = Call->getCalledFunction()) {
// If a function is both internal and has a single use, then it is
// extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
- if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
++NumInlineCandidates;
// If this call is to function itself, then the function is recursive.
@@ -148,7 +144,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
} else {
// We don't want inline asm to count as a call - that would prevent loop
// unrolling. The argument setup cost is still real, though.
- if (!isa<InlineAsm>(CS.getCalledValue()))
+ if (!Call->isInlineAsm())
++NumCalls;
}
}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5da29d6d2372..20231ca78b45 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1,9 +1,8 @@
//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -516,7 +516,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
MapTy = Type::getInt64Ty(C->getContext());
else if (LoadTy->isVectorTy()) {
MapTy = PointerType::getIntNTy(C->getContext(),
- DL.getTypeAllocSizeInBits(LoadTy));
+ DL.getTypeSizeInBits(LoadTy));
} else
return nullptr;
@@ -1000,7 +1000,9 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
const TargetLibraryInfo *TLI) {
Type *DestTy = InstOrCE->getType();
- // Handle easy binops first.
+ if (Instruction::isUnaryOp(Opcode))
+ return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
+
if (Instruction::isBinaryOp(Opcode))
return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -1025,15 +1027,18 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
case Instruction::FCmp: llvm_unreachable("Invalid for compares");
case Instruction::Call:
if (auto *F = dyn_cast<Function>(Ops.back())) {
- ImmutableCallSite CS(cast<CallInst>(InstOrCE));
- if (canConstantFoldCallTo(CS, F))
- return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI);
+ const auto *Call = cast<CallBase>(InstOrCE);
+ if (canConstantFoldCallTo(Call, F))
+ return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
}
return nullptr;
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::ExtractValue:
+ return ConstantExpr::getExtractValue(
+ Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
@@ -1263,6 +1268,13 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
+Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+ const DataLayout &DL) {
+ assert(Instruction::isUnaryOp(Opcode));
+
+ return ConstantExpr::get(Opcode, Op);
+}
+
Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
Constant *RHS,
const DataLayout &DL) {
@@ -1367,8 +1379,8 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// Constant Folding for Calls
//
-bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
- if (CS.isNoBuiltin() || CS.isStrictFP())
+bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return false;
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
@@ -1414,6 +1426,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::usub_sat:
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::bitreverse:
@@ -1518,14 +1532,12 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
namespace {
Constant *GetConstantFoldFPValue(double V, Type *Ty) {
- if (Ty->isHalfTy()) {
+ if (Ty->isHalfTy() || Ty->isFloatTy()) {
APFloat APF(V);
bool unused;
- APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
+ APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
return ConstantFP::get(Ty->getContext(), APF);
}
- if (Ty->isFloatTy())
- return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold half/float/double");
@@ -1641,522 +1653,538 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
return false;
}
-Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
- ArrayRef<Constant *> Operands,
- const TargetLibraryInfo *TLI,
- ImmutableCallSite CS) {
- if (Operands.size() == 1) {
- if (IntrinsicID == Intrinsic::is_constant) {
- // We know we have a "Constant" argument. But we want to only
- // return true for manifest constants, not those that depend on
- // constants with unknowable values, e.g. GlobalValue or BlockAddress.
- if (isManifestConstant(Operands[0]))
- return ConstantInt::getTrue(Ty->getContext());
- return nullptr;
- }
- if (isa<UndefValue>(Operands[0])) {
- // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
- // ctpop() is between 0 and bitwidth, pick 0 for undef.
- if (IntrinsicID == Intrinsic::cos ||
- IntrinsicID == Intrinsic::ctpop)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::bswap ||
- IntrinsicID == Intrinsic::bitreverse ||
- IntrinsicID == Intrinsic::launder_invariant_group ||
- IntrinsicID == Intrinsic::strip_invariant_group)
- return Operands[0];
- }
+static Constant *ConstantFoldScalarCall1(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 1 && "Wrong number of operands.");
+
+ if (IntrinsicID == Intrinsic::is_constant) {
+ // We know we have a "Constant" argument. But we want to only
+ // return true for manifest constants, not those that depend on
+ // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+ if (isManifestConstant(Operands[0]))
+ return ConstantInt::getTrue(Ty->getContext());
+ return nullptr;
+ }
+ if (isa<UndefValue>(Operands[0])) {
+ // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+ // ctpop() is between 0 and bitwidth, pick 0 for undef.
+ if (IntrinsicID == Intrinsic::cos ||
+ IntrinsicID == Intrinsic::ctpop)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::bswap ||
+ IntrinsicID == Intrinsic::bitreverse ||
+ IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group)
+ return Operands[0];
+ }
- if (isa<ConstantPointerNull>(Operands[0])) {
- // launder(null) == null == strip(null) iff in addrspace 0
- if (IntrinsicID == Intrinsic::launder_invariant_group ||
- IntrinsicID == Intrinsic::strip_invariant_group) {
- // If instruction is not yet put in a basic block (e.g. when cloning
- // a function during inlining), CS caller may not be available.
- // So check CS's BB first before querying CS.getCaller.
- const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr;
- if (Caller &&
- !NullPointerIsDefined(
- Caller, Operands[0]->getType()->getPointerAddressSpace())) {
- return Operands[0];
- }
- return nullptr;
+ if (isa<ConstantPointerNull>(Operands[0])) {
+ // launder(null) == null == strip(null) iff in addrspace 0
+ if (IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group) {
+ // If instruction is not yet put in a basic block (e.g. when cloning
+ // a function during inlining), Call's caller may not be available.
+ // So check Call's BB first before querying Call->getCaller.
+ const Function *Caller =
+ Call->getParent() ? Call->getCaller() : nullptr;
+ if (Caller &&
+ !NullPointerIsDefined(
+ Caller, Operands[0]->getType()->getPointerAddressSpace())) {
+ return Operands[0];
}
+ return nullptr;
}
+ }
- if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
- if (IntrinsicID == Intrinsic::convert_to_fp16) {
- APFloat Val(Op->getValueAPF());
-
- bool lost = false;
- Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
+ if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (IntrinsicID == Intrinsic::convert_to_fp16) {
+ APFloat Val(Op->getValueAPF());
- return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
- }
+ bool lost = false;
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
- if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return nullptr;
+ return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
+ }
- if (IntrinsicID == Intrinsic::round) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToAway);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return nullptr;
- if (IntrinsicID == Intrinsic::floor) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardNegative);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::round) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::ceil) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardPositive);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::floor) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::trunc) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardZero);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::ceil) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::rint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::trunc) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::nearbyint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::rint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- /// We only fold functions with finite arguments. Folding NaN and inf is
- /// likely to be aborted with an exception anyway, and some host libms
- /// have known errors raising exceptions.
- if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
- return nullptr;
+ if (IntrinsicID == Intrinsic::nearbyint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- /// Currently APFloat versions of these functions do not exist, so we use
- /// the host native double versions. Float versions are not called
- /// directly but for all these it is true (float)(f((double)arg)) ==
- /// f(arg). Long double not supported yet.
- double V = getValueAsDouble(Op);
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return nullptr;
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::fabs:
- return ConstantFoldFP(fabs, V, Ty);
- case Intrinsic::log2:
- return ConstantFoldFP(Log2, V, Ty);
- case Intrinsic::log:
- return ConstantFoldFP(log, V, Ty);
- case Intrinsic::log10:
- return ConstantFoldFP(log10, V, Ty);
- case Intrinsic::exp:
- return ConstantFoldFP(exp, V, Ty);
- case Intrinsic::exp2:
- return ConstantFoldFP(exp2, V, Ty);
- case Intrinsic::sin:
- return ConstantFoldFP(sin, V, Ty);
- case Intrinsic::cos:
- return ConstantFoldFP(cos, V, Ty);
- case Intrinsic::sqrt:
- return ConstantFoldFP(sqrt, V, Ty);
- }
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V = getValueAsDouble(Op);
- if (!TLI)
- return nullptr;
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::fabs:
+ return ConstantFoldFP(fabs, V, Ty);
+ case Intrinsic::log2:
+ return ConstantFoldFP(Log2, V, Ty);
+ case Intrinsic::log:
+ return ConstantFoldFP(log, V, Ty);
+ case Intrinsic::log10:
+ return ConstantFoldFP(log10, V, Ty);
+ case Intrinsic::exp:
+ return ConstantFoldFP(exp, V, Ty);
+ case Intrinsic::exp2:
+ return ConstantFoldFP(exp2, V, Ty);
+ case Intrinsic::sin:
+ return ConstantFoldFP(sin, V, Ty);
+ case Intrinsic::cos:
+ return ConstantFoldFP(cos, V, Ty);
+ case Intrinsic::sqrt:
+ return ConstantFoldFP(sqrt, V, Ty);
+ }
- char NameKeyChar = Name[0];
- if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
- NameKeyChar = Name[2];
-
- switch (NameKeyChar) {
- case 'a':
- if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
- (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
- (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
- (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
- return ConstantFoldFP(acos, V, Ty);
- else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
- (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
- (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
- (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
- return ConstantFoldFP(asin, V, Ty);
- else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
- (Name == "atanf" && TLI->has(LibFunc_atanf)))
- return ConstantFoldFP(atan, V, Ty);
- break;
- case 'c':
- if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
- (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
- return ConstantFoldFP(ceil, V, Ty);
- else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
- (Name == "cosf" && TLI->has(LibFunc_cosf)))
- return ConstantFoldFP(cos, V, Ty);
- else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
- (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
- (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
- return ConstantFoldFP(cosh, V, Ty);
- break;
- case 'e':
- if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
- (Name == "expf" && TLI->has(LibFunc_expf)) ||
- (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
- (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
- return ConstantFoldFP(exp, V, Ty);
- if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
- (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
- (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
- // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
- // C99 library.
- return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
- break;
- case 'f':
- if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
- (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
- return ConstantFoldFP(fabs, V, Ty);
- else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
- (Name == "floorf" && TLI->has(LibFunc_floorf)))
- return ConstantFoldFP(floor, V, Ty);
- break;
- case 'l':
- if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
- (Name == "__log_finite" && V > 0 &&
- TLI->has(LibFunc_log_finite)) ||
- (Name == "__logf_finite" && V > 0 &&
- TLI->has(LibFunc_logf_finite)))
- return ConstantFoldFP(log, V, Ty);
- else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
- (Name == "__log10_finite" && V > 0 &&
- TLI->has(LibFunc_log10_finite)) ||
- (Name == "__log10f_finite" && V > 0 &&
- TLI->has(LibFunc_log10f_finite)))
- return ConstantFoldFP(log10, V, Ty);
- break;
- case 'r':
- if ((Name == "round" && TLI->has(LibFunc_round)) ||
- (Name == "roundf" && TLI->has(LibFunc_roundf)))
- return ConstantFoldFP(round, V, Ty);
- break;
- case 's':
- if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
- (Name == "sinf" && TLI->has(LibFunc_sinf)))
- return ConstantFoldFP(sin, V, Ty);
- else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
- (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
- (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
- return ConstantFoldFP(sinh, V, Ty);
- else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
- (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
- return ConstantFoldFP(sqrt, V, Ty);
- break;
- case 't':
- if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
- (Name == "tanf" && TLI->has(LibFunc_tanf)))
- return ConstantFoldFP(tan, V, Ty);
- else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
- (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
- return ConstantFoldFP(tanh, V, Ty);
- break;
- default:
- break;
- }
+ if (!TLI)
return nullptr;
- }
- if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
- switch (IntrinsicID) {
- case Intrinsic::bswap:
- return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
- case Intrinsic::ctpop:
- return ConstantInt::get(Ty, Op->getValue().countPopulation());
- case Intrinsic::bitreverse:
- return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
- case Intrinsic::convert_from_fp16: {
- APFloat Val(APFloat::IEEEhalf(), Op->getValue());
-
- bool lost = false;
- APFloat::opStatus status = Val.convert(
- Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
-
- // Conversion is always precise.
- (void)status;
- assert(status == APFloat::opOK && !lost &&
- "Precision lost during fp16 constfolding");
-
- return ConstantFP::get(Ty->getContext(), Val);
- }
- default:
- return nullptr;
- }
- }
+ char NameKeyChar = Name[0];
+ if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
+ NameKeyChar = Name[2];
- // Support ConstantVector in case we have an Undef in the top.
- if (isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) {
- auto *Op = cast<Constant>(Operands[0]);
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::x86_sse_cvtss2si:
- case Intrinsic::x86_sse_cvtss2si64:
- case Intrinsic::x86_sse2_cvtsd2si:
- case Intrinsic::x86_sse2_cvtsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/true);
- break;
- case Intrinsic::x86_sse_cvttss2si:
- case Intrinsic::x86_sse_cvttss2si64:
- case Intrinsic::x86_sse2_cvttsd2si:
- case Intrinsic::x86_sse2_cvttsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/true);
- break;
- }
+ switch (NameKeyChar) {
+ case 'a':
+ if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
+ (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
+ (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
+ (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+ return ConstantFoldFP(acos, V, Ty);
+ else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
+ (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
+ (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
+ (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+ return ConstantFoldFP(asin, V, Ty);
+ else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
+ (Name == "atanf" && TLI->has(LibFunc_atanf)))
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
+ (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
+ return ConstantFoldFP(ceil, V, Ty);
+ else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
+ (Name == "cosf" && TLI->has(LibFunc_cosf)))
+ return ConstantFoldFP(cos, V, Ty);
+ else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
+ (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
+ (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
+ (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+ return ConstantFoldFP(cosh, V, Ty);
+ break;
+ case 'e':
+ if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
+ (Name == "expf" && TLI->has(LibFunc_expf)) ||
+ (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
+ (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+ return ConstantFoldFP(exp, V, Ty);
+ if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
+ (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
+ (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
+ (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
+ // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+ // C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+ break;
+ case 'f':
+ if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
+ (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
+ return ConstantFoldFP(fabs, V, Ty);
+ else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
+ (Name == "floorf" && TLI->has(LibFunc_floorf)))
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
+ (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
+ (Name == "__log_finite" && V > 0 &&
+ TLI->has(LibFunc_log_finite)) ||
+ (Name == "__logf_finite" && V > 0 &&
+ TLI->has(LibFunc_logf_finite)))
+ return ConstantFoldFP(log, V, Ty);
+ else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
+ (Name == "__log10_finite" && V > 0 &&
+ TLI->has(LibFunc_log10_finite)) ||
+ (Name == "__log10f_finite" && V > 0 &&
+ TLI->has(LibFunc_log10f_finite)))
+ return ConstantFoldFP(log10, V, Ty);
+ break;
+ case 'r':
+ if ((Name == "round" && TLI->has(LibFunc_round)) ||
+ (Name == "roundf" && TLI->has(LibFunc_roundf)))
+ return ConstantFoldFP(round, V, Ty);
+ break;
+ case 's':
+ if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
+ (Name == "sinf" && TLI->has(LibFunc_sinf)))
+ return ConstantFoldFP(sin, V, Ty);
+ else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
+ (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
+ (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
+ (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+ return ConstantFoldFP(sinh, V, Ty);
+ else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
+ (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+ return ConstantFoldFP(sqrt, V, Ty);
+ break;
+ case 't':
+ if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
+ (Name == "tanf" && TLI->has(LibFunc_tanf)))
+ return ConstantFoldFP(tan, V, Ty);
+ else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
+ (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
}
-
return nullptr;
}
- if (Operands.size() == 2) {
- if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
- if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return nullptr;
- double Op1V = getValueAsDouble(Op1);
-
- if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
- if (Op2->getType() != Op1->getType())
- return nullptr;
+ if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ switch (IntrinsicID) {
+ case Intrinsic::bswap:
+ return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
+ case Intrinsic::ctpop:
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ case Intrinsic::bitreverse:
+ return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
+ case Intrinsic::convert_from_fp16: {
+ APFloat Val(APFloat::IEEEhalf(), Op->getValue());
+
+ bool lost = false;
+ APFloat::opStatus status = Val.convert(
+ Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
+
+ // Conversion is always precise.
+ (void)status;
+ assert(status == APFloat::opOK && !lost &&
+ "Precision lost during fp16 constfolding");
+
+ return ConstantFP::get(Ty->getContext(), Val);
+ }
+ default:
+ return nullptr;
+ }
+ }
- double Op2V = getValueAsDouble(Op2);
- if (IntrinsicID == Intrinsic::pow) {
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- }
- if (IntrinsicID == Intrinsic::copysign) {
- APFloat V1 = Op1->getValueAPF();
- const APFloat &V2 = Op2->getValueAPF();
- V1.copySign(V2);
- return ConstantFP::get(Ty->getContext(), V1);
- }
+ // Support ConstantVector in case we have an Undef in the top.
+ if (isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ }
+ }
- if (IntrinsicID == Intrinsic::minnum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
- }
+ return nullptr;
+}
- if (IntrinsicID == Intrinsic::maxnum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
- }
+static Constant *ConstantFoldScalarCall2(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 2 && "Wrong number of operands.");
- if (IntrinsicID == Intrinsic::minimum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
- }
+ if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return nullptr;
+ double Op1V = getValueAsDouble(Op1);
- if (IntrinsicID == Intrinsic::maximum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
- }
+ if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ if (Op2->getType() != Op1->getType())
+ return nullptr;
- if (!TLI)
- return nullptr;
- if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
- (Name == "powf" && TLI->has(LibFunc_powf)) ||
- (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
- (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
- (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
- return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
- (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
- (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
- return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
- } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
- if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((float)std::pow((float)Op1V,
- (int)Op2C->getZExtValue())));
- if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((float)std::pow((float)Op1V,
- (int)Op2C->getZExtValue())));
- if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((double)std::pow((double)Op1V,
- (int)Op2C->getZExtValue())));
+ double Op2V = getValueAsDouble(Op2);
+ if (IntrinsicID == Intrinsic::pow) {
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ }
+ if (IntrinsicID == Intrinsic::copysign) {
+ APFloat V1 = Op1->getValueAPF();
+ const APFloat &V2 = Op2->getValueAPF();
+ V1.copySign(V2);
+ return ConstantFP::get(Ty->getContext(), V1);
}
- return nullptr;
- }
- if (Operands[0]->getType()->isIntegerTy() &&
- Operands[1]->getType()->isIntegerTy()) {
- const APInt *C0, *C1;
- if (!getConstIntOrUndef(Operands[0], C0) ||
- !getConstIntOrUndef(Operands[1], C1))
- return nullptr;
+ if (IntrinsicID == Intrinsic::minnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+ }
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- // Even if both operands are undef, we cannot fold muls to undef
- // in the general case. For example, on i2 there are no inputs
- // that would produce { i2 -1, i1 true } as the result.
- if (!C0 || !C1)
- return Constant::getNullValue(Ty);
- LLVM_FALLTHROUGH;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow: {
- if (!C0 || !C1)
- return UndefValue::get(Ty);
+ if (IntrinsicID == Intrinsic::maxnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+ }
- APInt Res;
- bool Overflow;
- switch (IntrinsicID) {
- default: llvm_unreachable("Invalid case");
- case Intrinsic::sadd_with_overflow:
- Res = C0->sadd_ov(*C1, Overflow);
- break;
- case Intrinsic::uadd_with_overflow:
- Res = C0->uadd_ov(*C1, Overflow);
- break;
- case Intrinsic::ssub_with_overflow:
- Res = C0->ssub_ov(*C1, Overflow);
- break;
- case Intrinsic::usub_with_overflow:
- Res = C0->usub_ov(*C1, Overflow);
- break;
- case Intrinsic::smul_with_overflow:
- Res = C0->smul_ov(*C1, Overflow);
- break;
- case Intrinsic::umul_with_overflow:
- Res = C0->umul_ov(*C1, Overflow);
- break;
- }
- Constant *Ops[] = {
- ConstantInt::get(Ty->getContext(), Res),
- ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
- };
- return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ if (IntrinsicID == Intrinsic::minimum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
}
- case Intrinsic::uadd_sat:
- case Intrinsic::sadd_sat:
- if (!C0 && !C1)
- return UndefValue::get(Ty);
- if (!C0 || !C1)
- return Constant::getAllOnesValue(Ty);
- if (IntrinsicID == Intrinsic::uadd_sat)
- return ConstantInt::get(Ty, C0->uadd_sat(*C1));
- else
- return ConstantInt::get(Ty, C0->sadd_sat(*C1));
- case Intrinsic::usub_sat:
- case Intrinsic::ssub_sat:
- if (!C0 && !C1)
- return UndefValue::get(Ty);
- if (!C0 || !C1)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::usub_sat)
- return ConstantInt::get(Ty, C0->usub_sat(*C1));
- else
- return ConstantInt::get(Ty, C0->ssub_sat(*C1));
- case Intrinsic::cttz:
- case Intrinsic::ctlz:
- assert(C1 && "Must be constant int");
-
- // cttz(0, 1) and ctlz(0, 1) are undef.
- if (C1->isOneValue() && (!C0 || C0->isNullValue()))
- return UndefValue::get(Ty);
- if (!C0)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::cttz)
- return ConstantInt::get(Ty, C0->countTrailingZeros());
- else
- return ConstantInt::get(Ty, C0->countLeadingZeros());
+
+ if (IntrinsicID == Intrinsic::maximum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
}
- return nullptr;
+ if (!TLI)
+ return nullptr;
+ if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
+ (Name == "powf" && TLI->has(LibFunc_powf)) ||
+ (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
+ (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
+ (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
+ (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
+ (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
+ (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
}
+ return nullptr;
+ }
- // Support ConstantVector in case we have an Undef in the top.
- if ((isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) &&
- // Check for default rounding mode.
- // FIXME: Support other rounding modes?
- isa<ConstantInt>(Operands[1]) &&
- cast<ConstantInt>(Operands[1])->getValue() == 4) {
- auto *Op = cast<Constant>(Operands[0]);
+ if (Operands[0]->getType()->isIntegerTy() &&
+ Operands[1]->getType()->isIntegerTy()) {
+ const APInt *C0, *C1;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1))
+ return nullptr;
+
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // Even if both operands are undef, we cannot fold muls to undef
+ // in the general case. For example, on i2 there are no inputs
+ // that would produce { i2 -1, i1 true } as the result.
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ APInt Res;
+ bool Overflow;
switch (IntrinsicID) {
- default: break;
- case Intrinsic::x86_avx512_vcvtss2si32:
- case Intrinsic::x86_avx512_vcvtss2si64:
- case Intrinsic::x86_avx512_vcvtsd2si32:
- case Intrinsic::x86_avx512_vcvtsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/true);
+ default: llvm_unreachable("Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = C0->sadd_ov(*C1, Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = C0->uadd_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_vcvtss2usi32:
- case Intrinsic::x86_avx512_vcvtss2usi64:
- case Intrinsic::x86_avx512_vcvtsd2usi32:
- case Intrinsic::x86_avx512_vcvtsd2usi64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/false);
+ case Intrinsic::ssub_with_overflow:
+ Res = C0->ssub_ov(*C1, Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = C0->usub_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_cvttss2si:
- case Intrinsic::x86_avx512_cvttss2si64:
- case Intrinsic::x86_avx512_cvttsd2si:
- case Intrinsic::x86_avx512_cvttsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/true);
+ case Intrinsic::smul_with_overflow:
+ Res = C0->smul_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_cvttss2usi:
- case Intrinsic::x86_avx512_cvttss2usi64:
- case Intrinsic::x86_avx512_cvttsd2usi:
- case Intrinsic::x86_avx512_cvttsd2usi64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/false);
+ case Intrinsic::umul_with_overflow:
+ Res = C0->umul_ov(*C1, Overflow);
break;
}
+ Constant *Ops[] = {
+ ConstantInt::get(Ty->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getAllOnesValue(Ty);
+ if (IntrinsicID == Intrinsic::uadd_sat)
+ return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::usub_sat)
+ return ConstantInt::get(Ty, C0->usub_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ assert(C1 && "Must be constant int");
+
+ // cttz(0, 1) and ctlz(0, 1) are undef.
+ if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ return UndefValue::get(Ty);
+ if (!C0)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::cttz)
+ return ConstantInt::get(Ty, C0->countTrailingZeros());
+ else
+ return ConstantInt::get(Ty, C0->countLeadingZeros());
}
+
return nullptr;
}
- if (Operands.size() != 3)
- return nullptr;
+ // Support ConstantVector in case we have an Undef in the top.
+ if ((isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) &&
+ // Check for default rounding mode.
+ // FIXME: Support other rounding modes?
+ isa<ConstantInt>(Operands[1]) &&
+ cast<ConstantInt>(Operands[1])->getValue() == 4) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/false);
+ break;
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/false);
+ break;
+ }
+ }
+ return nullptr;
+}
+
+static Constant *ConstantFoldScalarCall3(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 3 && "Wrong number of operands.");
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -2179,6 +2207,43 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
}
+ if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat: {
+ // This code performs rounding towards negative infinity in case the
+ // result cannot be represented exactly for the given scale. Targets
+ // that do care about rounding should use a target hook for specifying
+ // how rounding should be done, and provide their own folding to be
+ // consistent with rounding. This is the same approach as used by
+ // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+ APInt Lhs = Op1->getValue();
+ APInt Rhs = Op2->getValue();
+ unsigned Scale = Op3->getValue().getZExtValue();
+ unsigned Width = Lhs.getBitWidth();
+ assert(Scale < Width && "Illegal scale.");
+ unsigned ExtendedWidth = Width * 2;
+ APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+ Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+ if (IntrinsicID == Intrinsic::smul_fix_sat) {
+ APInt MaxValue =
+ APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+ APInt MinValue =
+ APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+ Product = APIntOps::smin(Product, MaxValue);
+ Product = APIntOps::smax(Product, MinValue);
+ }
+ return ConstantInt::get(Ty->getContext(),
+ Product.sextOrTrunc(Width));
+ }
+ }
+ }
+ }
+ }
+
if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
@@ -2212,11 +2277,31 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}
-Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
- VectorType *VTy, ArrayRef<Constant *> Operands,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- ImmutableCallSite CS) {
+static Constant *ConstantFoldScalarCall(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ if (Operands.size() == 1)
+ return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ if (Operands.size() == 2)
+ return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ if (Operands.size() == 3)
+ return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ return nullptr;
+}
+
+static Constant *ConstantFoldVectorCall(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ VectorType *VTy,
+ ArrayRef<Constant *> Operands,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
SmallVector<Constant *, 4> Result(VTy->getNumElements());
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = VTy->getElementType();
@@ -2263,10 +2348,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.
for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
- // These intrinsics use a scalar type for their second argument.
- if (J == 1 &&
- (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz ||
- IntrinsicID == Intrinsic::powi)) {
+ // Some intrinsics use a scalar type for certain arguments.
+ if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
Lane[J] = Operands[J];
continue;
}
@@ -2279,7 +2362,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
}
// Use the regular scalar folding to simplify this column.
- Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS);
+ Constant *Folded =
+ ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
if (!Folded)
return nullptr;
Result[I] = Folded;
@@ -2290,11 +2374,10 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
} // end anonymous namespace
-Constant *
-llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
- ArrayRef<Constant *> Operands,
- const TargetLibraryInfo *TLI) {
- if (CS.isNoBuiltin() || CS.isStrictFP())
+Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI) {
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return nullptr;
if (!F->hasName())
return nullptr;
@@ -2304,17 +2387,19 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
if (auto *VTy = dyn_cast<VectorType>(Ty))
return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands,
- F->getParent()->getDataLayout(), TLI, CS);
+ F->getParent()->getDataLayout(), TLI, Call);
- return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS);
+ return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
+ Call);
}
-bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+bool llvm::isMathLibCallNoop(const CallBase *Call,
+ const TargetLibraryInfo *TLI) {
// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
// (and to some extent ConstantFoldScalarCall).
- if (CS.isNoBuiltin() || CS.isStrictFP())
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return false;
- Function *F = CS.getCalledFunction();
+ Function *F = Call->getCalledFunction();
if (!F)
return false;
@@ -2322,8 +2407,8 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
- if (CS.getNumArgOperands() == 1) {
- if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+ if (Call->getNumArgOperands() == 1) {
+ if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
case LibFunc_logl:
@@ -2421,9 +2506,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
}
}
- if (CS.getNumArgOperands() == 2) {
- ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
- ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+ if (Call->getNumArgOperands() == 2) {
+ ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
+ ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
if (Op0C && Op1C) {
const APFloat &Op0 = Op0C->getValueAPF();
const APFloat &Op1 = Op1C->getValueAPF();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 3d55bf20bb40..bf0cdbfd0c8b 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -1,9 +1,8 @@
//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 4cafb7da16d3..c1043e446beb 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -1,9 +1,8 @@
//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 34f785fb02be..01b8ff10d355 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -1,9 +1,8 @@
//===- DemandedBits.cpp - Determine demanded bits -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -340,6 +339,8 @@ void DemandedBits::performAnalysis() {
Type *T = J->getType();
if (T->isIntOrIntVectorTy())
AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ else
+ Visited.insert(J);
Worklist.insert(J);
}
}
@@ -355,16 +356,18 @@ void DemandedBits::performAnalysis() {
LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
APInt AOut;
+ bool InputIsKnownDead = false;
if (UserI->getType()->isIntOrIntVectorTy()) {
AOut = AliveBits[UserI];
LLVM_DEBUG(dbgs() << " Alive Out: 0x"
<< Twine::utohexstr(AOut.getLimitedValue()));
+
+ // If all bits of the output are dead, then all bits of the input
+ // are also dead.
+ InputIsKnownDead = !AOut && !isAlwaysLive(UserI);
}
LLVM_DEBUG(dbgs() << "\n");
- if (!UserI->getType()->isIntOrIntVectorTy())
- Visited.insert(UserI);
-
KnownBits Known, Known2;
bool KnownBitsComputed = false;
// Compute the set of alive bits for each operand. These are anded into the
@@ -381,10 +384,7 @@ void DemandedBits::performAnalysis() {
if (T->isIntOrIntVectorTy()) {
unsigned BitWidth = T->getScalarSizeInBits();
APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- // If all bits of the output are dead, then all bits of the input
- // are also dead.
+ if (InputIsKnownDead) {
AB = APInt(BitWidth, 0);
} else {
// Bits of each operand that are used to compute alive bits of the
@@ -403,18 +403,13 @@ void DemandedBits::performAnalysis() {
// If we've added to the set of alive bits (or the operand has not
// been previously visited), then re-queue the operand to be visited
// again.
- APInt ABPrev(BitWidth, 0);
- auto ABI = AliveBits.find(I);
- if (ABI != AliveBits.end())
- ABPrev = ABI->second;
-
- APInt ABNew = AB | ABPrev;
- if (ABNew != ABPrev || ABI == AliveBits.end()) {
- AliveBits[I] = std::move(ABNew);
+ auto Res = AliveBits.try_emplace(I);
+ if (Res.second || (AB |= Res.first->second) != Res.first->second) {
+ Res.first->second = std::move(AB);
Worklist.insert(I);
}
}
- } else if (I && !Visited.count(I)) {
+ } else if (I && Visited.insert(I).second) {
Worklist.insert(I);
}
}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 3f4dfa52e1da..75f269e84f9d 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,6 +109,14 @@ STATISTIC(BanerjeeSuccesses, "Banerjee successes");
static cl::opt<bool>
Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Try to delinearize array references."));
+static cl::opt<bool> DisableDelinearizationChecks(
+ "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc(
+ "Disable checks that try to statically verify validity of "
+ "delinearized subscripts. Enabling this option may result in incorrect "
+ "dependence vectors for languages that allow the subscript of one "
+ "dimension to underflow or overflow into another dimension."));
//===----------------------------------------------------------------------===//
// basics
@@ -3317,19 +3324,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// and dst.
// FIXME: It may be better to record these sizes and add them as constraints
// to the dependency checks.
- for (int i = 1; i < size; ++i) {
- if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
- return false;
+ if (!DisableDelinearizationChecks)
+ for (int i = 1; i < size; ++i) {
+ if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+ return false;
- if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
- return false;
+ if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+ return false;
- if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
- return false;
+ if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+ return false;
- if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
- return false;
- }
+ if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+ return false;
+ }
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3369,6 +3377,19 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
}
#endif
+bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check if the analysis itself has been invalidated.
+ auto PAC = PA.getChecker<DependenceAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ return true;
+
+ // Check transitive dependencies.
+ return Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+ Inv.invalidate<LoopAnalysis>(F, PA);
+}
+
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
@@ -3510,7 +3531,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// to either Separable or Coupled).
//
// Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
- // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+ // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
// so Pair[3].Group = {0, 1, 3} and Done = false.
//
// Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 7ba23854a3cc..0ccd59ef2bfd 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 8abc0e7d0df9..d9f43dd746ef 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -1,9 +1,8 @@
//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp
new file mode 100644
index 000000000000..49215889cfd6
--- /dev/null
+++ b/lib/Analysis/DomTreeUpdater.cpp
@@ -0,0 +1,533 @@
+//===- DomTreeUpdater.cpp - DomTree/Post DomTree Updater --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DomTreeUpdater class, which provides a uniform way
+// to update dominator tree related data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <algorithm>
+#include <functional>
+#include <utility>
+
+namespace llvm {
+
+bool DomTreeUpdater::isUpdateValid(
+ const DominatorTree::UpdateType Update) const {
+ const auto *From = Update.getFrom();
+ const auto *To = Update.getTo();
+ const auto Kind = Update.getKind();
+
+ // Discard updates by inspecting the current state of successors of From.
+ // Since isUpdateValid() must be called *after* the Terminator of From is
+ // altered we can determine if the update is unnecessary for batch updates
+ // or invalid for a single update.
+ const bool HasEdge = llvm::any_of(
+ successors(From), [To](const BasicBlock *B) { return B == To; });
+
+ // If the IR does not match the update,
+ // 1. In batch updates, this update is unnecessary.
+ // 2. When called by insertEdge*()/deleteEdge*(), this update is invalid.
+ // Edge does not exist in IR.
+ if (Kind == DominatorTree::Insert && !HasEdge)
+ return false;
+
+ // Edge exists in IR.
+ if (Kind == DominatorTree::Delete && HasEdge)
+ return false;
+
+ return true;
+}
+
+bool DomTreeUpdater::isSelfDominance(
+ const DominatorTree::UpdateType Update) const {
+ // Won't affect DomTree and PostDomTree.
+ return Update.getFrom() == Update.getTo();
+}
+
+void DomTreeUpdater::applyDomTreeUpdates() {
+ // No pending DomTreeUpdates.
+ if (Strategy != UpdateStrategy::Lazy || !DT)
+ return;
+
+ // Only apply updates not are applied by DomTree.
+ if (hasPendingDomTreeUpdates()) {
+ const auto I = PendUpdates.begin() + PendDTUpdateIndex;
+ const auto E = PendUpdates.end();
+ assert(I < E && "Iterator range invalid; there should be DomTree updates.");
+ DT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
+ PendDTUpdateIndex = PendUpdates.size();
+ }
+}
+
+void DomTreeUpdater::flush() {
+ applyDomTreeUpdates();
+ applyPostDomTreeUpdates();
+ dropOutOfDateUpdates();
+}
+
+void DomTreeUpdater::applyPostDomTreeUpdates() {
+ // No pending PostDomTreeUpdates.
+ if (Strategy != UpdateStrategy::Lazy || !PDT)
+ return;
+
+ // Only apply updates not are applied by PostDomTree.
+ if (hasPendingPostDomTreeUpdates()) {
+ const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
+ const auto E = PendUpdates.end();
+ assert(I < E &&
+ "Iterator range invalid; there should be PostDomTree updates.");
+ PDT->applyUpdates(ArrayRef<DominatorTree::UpdateType>(I, E));
+ PendPDTUpdateIndex = PendUpdates.size();
+ }
+}
+
+void DomTreeUpdater::tryFlushDeletedBB() {
+ if (!hasPendingUpdates())
+ forceFlushDeletedBB();
+}
+
+bool DomTreeUpdater::forceFlushDeletedBB() {
+ if (DeletedBBs.empty())
+ return false;
+
+ for (auto *BB : DeletedBBs) {
+ // After calling deleteBB or callbackDeleteBB under Lazy UpdateStrategy,
+ // validateDeleteBB() removes all instructions of DelBB and adds an
+ // UnreachableInst as its terminator. So we check whether the BasicBlock to
+ // delete only has an UnreachableInst inside.
+ assert(BB->getInstList().size() == 1 &&
+ isa<UnreachableInst>(BB->getTerminator()) &&
+ "DelBB has been modified while awaiting deletion.");
+ BB->removeFromParent();
+ eraseDelBBNode(BB);
+ delete BB;
+ }
+ DeletedBBs.clear();
+ Callbacks.clear();
+ return true;
+}
+
+void DomTreeUpdater::recalculate(Function &F) {
+
+ if (Strategy == UpdateStrategy::Eager) {
+ if (DT)
+ DT->recalculate(F);
+ if (PDT)
+ PDT->recalculate(F);
+ return;
+ }
+
+ // There is little performance gain if we pend the recalculation under
+ // Lazy UpdateStrategy so we recalculate available trees immediately.
+
+ // Prevent forceFlushDeletedBB() from erasing DomTree or PostDomTree nodes.
+ IsRecalculatingDomTree = IsRecalculatingPostDomTree = true;
+
+ // Because all trees are going to be up-to-date after recalculation,
+ // flush awaiting deleted BasicBlocks.
+ forceFlushDeletedBB();
+ if (DT)
+ DT->recalculate(F);
+ if (PDT)
+ PDT->recalculate(F);
+
+ // Resume forceFlushDeletedBB() to erase DomTree or PostDomTree nodes.
+ IsRecalculatingDomTree = IsRecalculatingPostDomTree = false;
+ PendDTUpdateIndex = PendPDTUpdateIndex = PendUpdates.size();
+ dropOutOfDateUpdates();
+}
+
+bool DomTreeUpdater::hasPendingUpdates() const {
+ return hasPendingDomTreeUpdates() || hasPendingPostDomTreeUpdates();
+}
+
+bool DomTreeUpdater::hasPendingDomTreeUpdates() const {
+ if (!DT)
+ return false;
+ return PendUpdates.size() != PendDTUpdateIndex;
+}
+
+bool DomTreeUpdater::hasPendingPostDomTreeUpdates() const {
+ if (!PDT)
+ return false;
+ return PendUpdates.size() != PendPDTUpdateIndex;
+}
+
+bool DomTreeUpdater::isBBPendingDeletion(llvm::BasicBlock *DelBB) const {
+ if (Strategy == UpdateStrategy::Eager || DeletedBBs.empty())
+ return false;
+ return DeletedBBs.count(DelBB) != 0;
+}
+
+// The DT and PDT require the nodes related to updates
+// are not deleted when update functions are called.
+// So BasicBlock deletions must be pended when the
+// UpdateStrategy is Lazy. When the UpdateStrategy is
+// Eager, the BasicBlock will be deleted immediately.
+void DomTreeUpdater::deleteBB(BasicBlock *DelBB) {
+ validateDeleteBB(DelBB);
+ if (Strategy == UpdateStrategy::Lazy) {
+ DeletedBBs.insert(DelBB);
+ return;
+ }
+
+ DelBB->removeFromParent();
+ eraseDelBBNode(DelBB);
+ delete DelBB;
+}
+
+void DomTreeUpdater::callbackDeleteBB(
+ BasicBlock *DelBB, std::function<void(BasicBlock *)> Callback) {
+ validateDeleteBB(DelBB);
+ if (Strategy == UpdateStrategy::Lazy) {
+ Callbacks.push_back(CallBackOnDeletion(DelBB, Callback));
+ DeletedBBs.insert(DelBB);
+ return;
+ }
+
+ DelBB->removeFromParent();
+ eraseDelBBNode(DelBB);
+ Callback(DelBB);
+ delete DelBB;
+}
+
+void DomTreeUpdater::eraseDelBBNode(BasicBlock *DelBB) {
+ if (DT && !IsRecalculatingDomTree)
+ if (DT->getNode(DelBB))
+ DT->eraseNode(DelBB);
+
+ if (PDT && !IsRecalculatingPostDomTree)
+ if (PDT->getNode(DelBB))
+ PDT->eraseNode(DelBB);
+}
+
+void DomTreeUpdater::validateDeleteBB(BasicBlock *DelBB) {
+ assert(DelBB && "Invalid push_back of nullptr DelBB.");
+ assert(pred_empty(DelBB) && "DelBB has one or more predecessors.");
+ // DelBB is unreachable and all its instructions are dead.
+ while (!DelBB->empty()) {
+ Instruction &I = DelBB->back();
+ // Replace used instructions with an arbitrary value (undef).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(llvm::UndefValue::get(I.getType()));
+ DelBB->getInstList().pop_back();
+ }
+ // Make sure DelBB has a valid terminator instruction. As long as DelBB is a
+ // Child of Function F it must contain valid IR.
+ new UnreachableInst(DelBB->getContext(), DelBB);
+}
+
+void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) {
+ if (!DT && !PDT)
+ return;
+
+ if (Strategy == UpdateStrategy::Lazy) {
+ for (const auto U : Updates)
+ if (!isSelfDominance(U))
+ PendUpdates.push_back(U);
+
+ return;
+ }
+
+ if (DT)
+ DT->applyUpdates(Updates);
+ if (PDT)
+ PDT->applyUpdates(Updates);
+}
+
+void DomTreeUpdater::applyUpdatesPermissive(
+ ArrayRef<DominatorTree::UpdateType> Updates) {
+ if (!DT && !PDT)
+ return;
+
+ SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen;
+ SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates;
+ for (const auto U : Updates) {
+ auto Edge = std::make_pair(U.getFrom(), U.getTo());
+ // Because it is illegal to submit updates that have already been applied
+ // and updates to an edge need to be strictly ordered,
+ // it is safe to infer the existence of an edge from the first update
+ // to this edge.
+ // If the first update to an edge is "Delete", it means that the edge
+ // existed before. If the first update to an edge is "Insert", it means
+ // that the edge didn't exist before.
+ //
+ // For example, if the user submits {{Delete, A, B}, {Insert, A, B}},
+ // because
+ // 1. it is illegal to submit updates that have already been applied,
+ // i.e., user cannot delete an nonexistent edge,
+ // 2. updates to an edge need to be strictly ordered,
+ // So, initially edge A -> B existed.
+ // We can then safely ignore future updates to this edge and directly
+ // inspect the current CFG:
+ // a. If the edge still exists, because the user cannot insert an existent
+ // edge, so both {Delete, A, B}, {Insert, A, B} actually happened and
+ // resulted in a no-op. DTU won't submit any update in this case.
+ // b. If the edge doesn't exist, we can then infer that {Delete, A, B}
+ // actually happened but {Insert, A, B} was an invalid update which never
+ // happened. DTU will submit {Delete, A, B} in this case.
+ if (!isSelfDominance(U) && Seen.count(Edge) == 0) {
+ Seen.insert(Edge);
+ // If the update doesn't appear in the CFG, it means that
+ // either the change isn't made or relevant operations
+ // result in a no-op.
+ if (isUpdateValid(U)) {
+ if (isLazy())
+ PendUpdates.push_back(U);
+ else
+ DeduplicatedUpdates.push_back(U);
+ }
+ }
+ }
+
+ if (Strategy == UpdateStrategy::Lazy)
+ return;
+
+ if (DT)
+ DT->applyUpdates(DeduplicatedUpdates);
+ if (PDT)
+ PDT->applyUpdates(DeduplicatedUpdates);
+}
+
+DominatorTree &DomTreeUpdater::getDomTree() {
+ assert(DT && "Invalid acquisition of a null DomTree");
+ applyDomTreeUpdates();
+ dropOutOfDateUpdates();
+ return *DT;
+}
+
+PostDominatorTree &DomTreeUpdater::getPostDomTree() {
+ assert(PDT && "Invalid acquisition of a null PostDomTree");
+ applyPostDomTreeUpdates();
+ dropOutOfDateUpdates();
+ return *PDT;
+}
+
+void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
+
+#ifndef NDEBUG
+ assert(isUpdateValid({DominatorTree::Insert, From, To}) &&
+ "Inserted edge does not appear in the CFG");
+#endif
+
+ if (!DT && !PDT)
+ return;
+
+ // Won't affect DomTree and PostDomTree; discard update.
+ if (From == To)
+ return;
+
+ if (Strategy == UpdateStrategy::Eager) {
+ if (DT)
+ DT->insertEdge(From, To);
+ if (PDT)
+ PDT->insertEdge(From, To);
+ return;
+ }
+
+ PendUpdates.push_back({DominatorTree::Insert, From, To});
+}
+
+void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
+ if (From == To)
+ return;
+
+ if (!DT && !PDT)
+ return;
+
+ if (!isUpdateValid({DominatorTree::Insert, From, To}))
+ return;
+
+ if (Strategy == UpdateStrategy::Eager) {
+ if (DT)
+ DT->insertEdge(From, To);
+ if (PDT)
+ PDT->insertEdge(From, To);
+ return;
+ }
+
+ PendUpdates.push_back({DominatorTree::Insert, From, To});
+}
+
+void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
+
+#ifndef NDEBUG
+ assert(isUpdateValid({DominatorTree::Delete, From, To}) &&
+ "Deleted edge still exists in the CFG!");
+#endif
+
+ if (!DT && !PDT)
+ return;
+
+ // Won't affect DomTree and PostDomTree; discard update.
+ if (From == To)
+ return;
+
+ if (Strategy == UpdateStrategy::Eager) {
+ if (DT)
+ DT->deleteEdge(From, To);
+ if (PDT)
+ PDT->deleteEdge(From, To);
+ return;
+ }
+
+ PendUpdates.push_back({DominatorTree::Delete, From, To});
+}
+
+void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
+ if (From == To)
+ return;
+
+ if (!DT && !PDT)
+ return;
+
+ if (!isUpdateValid({DominatorTree::Delete, From, To}))
+ return;
+
+ if (Strategy == UpdateStrategy::Eager) {
+ if (DT)
+ DT->deleteEdge(From, To);
+ if (PDT)
+ PDT->deleteEdge(From, To);
+ return;
+ }
+
+ PendUpdates.push_back({DominatorTree::Delete, From, To});
+}
+
+void DomTreeUpdater::dropOutOfDateUpdates() {
+ if (Strategy == DomTreeUpdater::UpdateStrategy::Eager)
+ return;
+
+ tryFlushDeletedBB();
+
+ // Drop all updates applied by both trees.
+ if (!DT)
+ PendDTUpdateIndex = PendUpdates.size();
+ if (!PDT)
+ PendPDTUpdateIndex = PendUpdates.size();
+
+ const size_t dropIndex = std::min(PendDTUpdateIndex, PendPDTUpdateIndex);
+ const auto B = PendUpdates.begin();
+ const auto E = PendUpdates.begin() + dropIndex;
+ assert(B <= E && "Iterator out of range.");
+ PendUpdates.erase(B, E);
+ // Calculate current index.
+ PendDTUpdateIndex -= dropIndex;
+ PendPDTUpdateIndex -= dropIndex;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DomTreeUpdater::dump() const {
+ raw_ostream &OS = llvm::dbgs();
+
+ OS << "Available Trees: ";
+ if (DT || PDT) {
+ if (DT)
+ OS << "DomTree ";
+ if (PDT)
+ OS << "PostDomTree ";
+ OS << "\n";
+ } else
+ OS << "None\n";
+
+ OS << "UpdateStrategy: ";
+ if (Strategy == UpdateStrategy::Eager) {
+ OS << "Eager\n";
+ return;
+ } else
+ OS << "Lazy\n";
+ int Index = 0;
+
+ auto printUpdates =
+ [&](ArrayRef<DominatorTree::UpdateType>::const_iterator begin,
+ ArrayRef<DominatorTree::UpdateType>::const_iterator end) {
+ if (begin == end)
+ OS << " None\n";
+ Index = 0;
+ for (auto It = begin, ItEnd = end; It != ItEnd; ++It) {
+ auto U = *It;
+ OS << " " << Index << " : ";
+ ++Index;
+ if (U.getKind() == DominatorTree::Insert)
+ OS << "Insert, ";
+ else
+ OS << "Delete, ";
+ BasicBlock *From = U.getFrom();
+ if (From) {
+ auto S = From->getName();
+ if (!From->hasName())
+ S = "(no name)";
+ OS << S << "(" << From << "), ";
+ } else {
+ OS << "(badref), ";
+ }
+ BasicBlock *To = U.getTo();
+ if (To) {
+ auto S = To->getName();
+ if (!To->hasName())
+ S = "(no_name)";
+ OS << S << "(" << To << ")\n";
+ } else {
+ OS << "(badref)\n";
+ }
+ }
+ };
+
+ if (DT) {
+ const auto I = PendUpdates.begin() + PendDTUpdateIndex;
+ assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
+ "Iterator out of range.");
+ OS << "Applied but not cleared DomTreeUpdates:\n";
+ printUpdates(PendUpdates.begin(), I);
+ OS << "Pending DomTreeUpdates:\n";
+ printUpdates(I, PendUpdates.end());
+ }
+
+ if (PDT) {
+ const auto I = PendUpdates.begin() + PendPDTUpdateIndex;
+ assert(PendUpdates.begin() <= I && I <= PendUpdates.end() &&
+ "Iterator out of range.");
+ OS << "Applied but not cleared PostDomTreeUpdates:\n";
+ printUpdates(PendUpdates.begin(), I);
+ OS << "Pending PostDomTreeUpdates:\n";
+ printUpdates(I, PendUpdates.end());
+ }
+
+ OS << "Pending DeletedBBs:\n";
+ Index = 0;
+ for (auto BB : DeletedBBs) {
+ OS << " " << Index << " : ";
+ ++Index;
+ if (BB->hasName())
+ OS << BB->getName() << "(";
+ else
+ OS << "(no_name)(";
+ OS << BB << ")\n";
+ }
+
+ OS << "Pending Callbacks:\n";
+ Index = 0;
+ for (auto BB : Callbacks) {
+ OS << " " << Index << " : ";
+ ++Index;
+ if (BB->hasName())
+ OS << BB->getName() << "(";
+ else
+ OS << "(no_name)(";
+ OS << BB << ")\n";
+ }
+}
+#endif
+} // namespace llvm
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index de7f62cf4ecd..f9a554acb7ea 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -1,9 +1,8 @@
//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 0df73aeebbdc..2242541696a4 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -1,9 +1,8 @@
//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index b28abcadca4a..0d6c0ffb18a8 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -1,9 +1,8 @@
//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -514,7 +513,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
break;
}
- if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F->isDeclaration() || F->hasOptNone()) {
// Try to get mod/ref behaviour from function attributes.
if (F->doesNotAccessMemory()) {
// Can't do better than that!
@@ -567,7 +566,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Don't prove any properties based on the implementation of an optnone
// function. Function attributes were already used as a best approximation
// above.
- if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone))
+ if (Node->getFunction()->hasOptNone())
continue;
for (Instruction &I : instructions(Node->getFunction())) {
@@ -597,7 +596,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
// All non-call instructions we use the primary predicates for whether
- // thay read or write memory.
+ // they read or write memory.
if (I.mayReadFromMemory())
FI.addModRefInfo(ModRefInfo::Ref);
if (I.mayWriteToMemory())
@@ -791,10 +790,10 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
}
// FIXME: It would be good to handle other obvious no-alias cases here, but
- // it isn't clear how to do so reasonbly without building a small version
+ // it isn't clear how to do so reasonably without building a small version
// of BasicAA into this code. We could recurse into AAResultBase::alias
// here but that seems likely to go poorly as we're inside the
- // implementation of such a query. Until then, just conservatievly retun
+ // implementation of such a query. Until then, just conservatively return
// false.
return false;
} while (!Inputs.empty());
@@ -807,7 +806,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
/// other is some random pointer, we know there cannot be an alias, because the
/// address of the global isn't taken.
AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
// Get the base object these pointers point to.
const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
@@ -882,11 +882,12 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
if ((GV1 || GV2) && GV1 != GV2)
return NoAlias;
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
- const GlobalValue *GV) {
+ const GlobalValue *GV,
+ AAQueryInfo &AAQI) {
if (Call->doesNotAccessMemory())
return ModRefInfo::NoModRef;
ModRefInfo ConservativeResult =
@@ -895,14 +896,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
for (auto &A : Call->args()) {
- SmallVector<Value*, 4> Objects;
+ SmallVector<const Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
// All objects must be identified.
if (!all_of(Objects, isIdentifiedObject) &&
// Try ::alias to see if all objects are known not to alias GV.
- !all_of(Objects, [&](Value *V) {
- return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
+ !all_of(Objects, [&](const Value *V) {
+ return this->alias(MemoryLocation(V), MemoryLocation(GV), AAQI) ==
+ NoAlias;
}))
return ConservativeResult;
@@ -915,7 +917,8 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
}
ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
ModRefInfo Known = ModRefInfo::ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
@@ -927,11 +930,11 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
- getModRefInfoForArgument(Call, GV));
+ getModRefInfoForArgument(Call, GV, AAQI));
if (!isModOrRefSet(Known))
return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
- return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
+ return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
}
GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/lib/Analysis/GuardUtils.cpp b/lib/Analysis/GuardUtils.cpp
index 08fa6abeafb5..cad92f6e56bb 100644
--- a/lib/Analysis/GuardUtils.cpp
+++ b/lib/Analysis/GuardUtils.cpp
@@ -1,9 +1,8 @@
//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform analyzes related to guards and their
@@ -19,3 +18,32 @@ bool llvm::isGuard(const User *U) {
using namespace llvm::PatternMatch;
return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
}
+
+bool llvm::isGuardAsWidenableBranch(const User *U) {
+ Value *Condition, *WidenableCondition;
+ BasicBlock *GuardedBB, *DeoptBB;
+ if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
+ DeoptBB))
+ return false;
+ using namespace llvm::PatternMatch;
+ for (auto &Insn : *DeoptBB) {
+ if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
+ return true;
+ if (Insn.mayHaveSideEffects())
+ return false;
+ }
+ return false;
+}
+
+bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
+ Value *&WidenableCondition,
+ BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
+ using namespace llvm::PatternMatch;
+ if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
+ IfTrueBB, IfFalseBB)))
+ return false;
+ // TODO: At the moment, we only recognize the branch if the WC call in this
+ // specific position. We should generalize!
+ return match(WidenableCondition,
+ m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index aaebc4a481ec..ce285f82f720 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -1,9 +1,8 @@
//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -26,7 +26,6 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -252,6 +251,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Worklist.push_back(Start);
VisitedInsts.insert(Start);
+ // Start with all flags set because we will intersect this with the reduction
+ // flags from all the reduction operations.
+ FastMathFlags FMF = FastMathFlags::getFast();
+
// A value in the reduction can be used:
// - By the reduction:
// - Reduction operation:
@@ -297,6 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())
return false;
+ if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+ FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
}
bool IsASelect = isa<SelectInst>(Cur);
@@ -442,7 +447,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Save the description of this reduction variable.
RecurrenceDescriptor RD(
- RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;
@@ -550,9 +555,8 @@ RecurrenceDescriptor::isConditionalRdxPattern(
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
- bool FP = I->getType()->isFloatingPointTy();
Instruction *UAI = Prev.getUnsafeAlgebraInst();
- if (!UAI && FP && !I->isFast())
+ if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
switch (I->getOpcode()) {
@@ -1010,7 +1014,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
// If we started from an UnknownSCEV, and managed to build an addRecurrence
// only after enabling Assume with PSCEV, this means we may have encountered
// cast instructions that required adding a runtime check in order to
- // guarantee the correctness of the AddRecurence respresentation of the
+ // guarantee the correctness of the AddRecurrence respresentation of the
// induction.
if (PhiScev != AR && SymbolicPhi) {
SmallVector<Instruction *, 2> Casts;
@@ -1049,6 +1053,13 @@ bool InductionDescriptor::isInductionPHI(
Value *StartValue =
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+
+ BasicBlock *Latch = AR->getLoop()->getLoopLatch();
+ if (!Latch)
+ return false;
+ BinaryOperator *BOp =
+ dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
+
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
// The stride may be a constant or a loop invariant integer value.
@@ -1057,7 +1068,7 @@ bool InductionDescriptor::isInductionPHI(
return false;
if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
CastsToIgnore);
return true;
}
@@ -1084,6 +1095,6 @@ bool InductionDescriptor::isInductionPHI(
return false;
auto *StepValue =
SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
return true;
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 609e5e3a1448..681a0cf7e981 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -1,9 +1,8 @@
//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index d6e6e76af03c..6ff840efcb64 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6ddb3cbc01a3..0dec146e0465 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -1,9 +1,8 @@
//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -37,6 +35,7 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -51,19 +50,19 @@ static cl::opt<int> InlineThreshold(
cl::desc("Control the amount of inlining to perform (default = 225)"));
static cl::opt<int> HintThreshold(
- "inlinehint-threshold", cl::Hidden, cl::init(325),
+ "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with inline hint"));
static cl::opt<int>
ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
- cl::init(45),
+ cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining cold callsites"));
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
- "inlinecold-threshold", cl::Hidden, cl::init(45),
+ "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with cold attribute"));
static cl::opt<int>
@@ -77,7 +76,7 @@ static cl::opt<int> LocallyHotCallSiteThreshold(
static cl::opt<int> ColdCallSiteRelFreq(
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
- cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+ cl::desc("Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
@@ -88,7 +87,7 @@ static cl::opt<int> HotCallSiteRelFreq(
"profile information."));
static cl::opt<bool> OptComputeFullInlineCost(
- "inline-cost-full", cl::Hidden, cl::init(false),
+ "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
@@ -122,31 +121,43 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr.
- CallSite CandidateCS;
+ CallBase &CandidateCall;
/// Tunable parameters that control the analysis.
const InlineParams &Params;
+ /// Upper bound for the inlining cost. Bonuses are being applied to account
+ /// for speculative "expected profit" of the inlining decision.
int Threshold;
- int Cost;
+
+ /// Inlining cost measured in abstract units, accounts for all the
+ /// instructions expected to be executed for a given function invocation.
+ /// Instructions that are statically proven to be dead based on call-site
+ /// arguments are not counted here.
+ int Cost = 0;
+
bool ComputeFullInlineCost;
- bool IsCallerRecursive;
- bool IsRecursiveCall;
- bool ExposesReturnsTwice;
- bool HasDynamicAlloca;
- bool ContainsNoDuplicateCall;
- bool HasReturn;
- bool HasIndirectBr;
- bool HasUninlineableIntrinsic;
- bool InitsVargArgs;
+ bool IsCallerRecursive = false;
+ bool IsRecursiveCall = false;
+ bool ExposesReturnsTwice = false;
+ bool HasDynamicAlloca = false;
+ bool ContainsNoDuplicateCall = false;
+ bool HasReturn = false;
+ bool HasIndirectBr = false;
+ bool HasUninlineableIntrinsic = false;
+ bool InitsVargArgs = false;
/// Number of bytes allocated statically by the callee.
- uint64_t AllocatedSize;
- unsigned NumInstructions, NumVectorInstructions;
- int VectorBonus, TenPercentVectorBonus;
- // Bonus to be applied when the callee has only one reachable basic block.
- int SingleBBBonus;
+ uint64_t AllocatedSize = 0;
+ unsigned NumInstructions = 0;
+ unsigned NumVectorInstructions = 0;
+
+ /// Bonus to be applied when percentage of vector instructions in callee is
+ /// high (see more details in updateThreshold).
+ int VectorBonus = 0;
+ /// Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus = 0;
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
@@ -181,7 +192,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// loads.
bool EnableLoadElimination;
SmallPtrSet<Value *, 16> LoadAddrSet;
- int LoadEliminationCost;
+ int LoadEliminationCost = 0;
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
@@ -196,7 +207,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool isGEPFree(GetElementPtrInst &GEP);
bool canFoldInboundsGEP(GetElementPtrInst &I);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
- bool simplifyCallSite(Function *F, CallSite CS);
+ bool simplifyCallSite(Function *F, CallBase &Call);
template <typename Callable>
bool simplifyInstruction(Instruction &I, Callable Evaluate);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@@ -216,22 +227,28 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// attributes and callee hotness for PGO builds. The Callee is explicitly
/// passed to support analyzing indirect calls whose target is inferred by
/// analysis.
- void updateThreshold(CallSite CS, Function &Callee);
+ void updateThreshold(CallBase &Call, Function &Callee);
- /// Return true if size growth is allowed when inlining the callee at CS.
- bool allowSizeGrowth(CallSite CS);
+ /// Return true if size growth is allowed when inlining the callee at \p Call.
+ bool allowSizeGrowth(CallBase &Call);
- /// Return true if \p CS is a cold callsite.
- bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+ /// Return true if \p Call is a cold callsite.
+ bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
- /// Return a higher threshold if \p CS is a hot callsite.
- Optional<int> getHotCallSiteThreshold(CallSite CS,
+ /// Return a higher threshold if \p Call is a hot callsite.
+ Optional<int> getHotCallSiteThreshold(CallBase &Call,
BlockFrequencyInfo *CallerBFI);
// Custom analysis routines.
InlineResult analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues);
+ /// Handle a capped 'int' increment for Cost.
+ void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+ assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+ Cost = (int)std::min(UpperBound, Cost + Inc);
+ }
+
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
void visit(Module *);
@@ -256,11 +273,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitCmpInst(CmpInst &I);
bool visitSub(BinaryOperator &I);
bool visitBinaryOperator(BinaryOperator &I);
+ bool visitFNeg(UnaryOperator &I);
bool visitLoad(LoadInst &I);
bool visitStore(StoreInst &I);
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
- bool visitCallSite(CallSite CS);
+ bool visitCallBase(CallBase &Call);
bool visitReturnInst(ReturnInst &RI);
bool visitBranchInst(BranchInst &BI);
bool visitSelectInst(SelectInst &SI);
@@ -276,38 +294,29 @@ public:
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
- Function &Callee, CallSite CSArg, const InlineParams &Params)
+ Function &Callee, CallBase &Call, const InlineParams &Params)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
- CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
- Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
- Params.ComputeFullInlineCost || ORE),
- IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
- NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
- SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
-
- InlineResult analyzeCall(CallSite CS);
+ CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
+ ComputeFullInlineCost(OptComputeFullInlineCost ||
+ Params.ComputeFullInlineCost || ORE),
+ EnableLoadElimination(true) {}
+
+ InlineResult analyzeCall(CallBase &Call);
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
- unsigned NumConstantArgs;
- unsigned NumConstantOffsetPtrArgs;
- unsigned NumAllocaArgs;
- unsigned NumConstantPtrCmps;
- unsigned NumConstantPtrDiffs;
- unsigned NumInstructionsSimplified;
- unsigned SROACostSavings;
- unsigned SROACostSavingsLost;
+ unsigned NumConstantArgs = 0;
+ unsigned NumConstantOffsetPtrArgs = 0;
+ unsigned NumAllocaArgs = 0;
+ unsigned NumConstantPtrCmps = 0;
+ unsigned NumConstantPtrDiffs = 0;
+ unsigned NumInstructionsSimplified = 0;
+ unsigned SROACostSavings = 0;
+ unsigned SROACostSavingsLost = 0;
void dump();
};
@@ -342,7 +351,7 @@ bool CallAnalyzer::lookupSROAArgAndCost(
void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
// If we're no longer able to perform SROA we need to undo its cost savings
// and prevent subsequent analysis.
- Cost += CostIt->second;
+ addCost(CostIt->second);
SROACostSavings -= CostIt->second;
SROACostSavingsLost += CostIt->second;
SROAArgCosts.erase(CostIt);
@@ -366,7 +375,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
void CallAnalyzer::disableLoadElimination() {
if (EnableLoadElimination) {
- Cost += LoadEliminationCost;
+ addCost(LoadEliminationCost);
LoadEliminationCost = 0;
EnableLoadElimination = false;
}
@@ -701,7 +710,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
}
bool CallAnalyzer::visitCastInst(CastInst &I) {
- // Propagate constants through ptrtoint.
+ // Propagate constants through casts.
if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
}))
@@ -721,7 +730,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
case Instruction::FPToUI:
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ addCost(InlineConstants::CallPenalty);
break;
default:
break;
@@ -737,14 +746,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
}))
return true;
- // Disable any SROA on the argument to arbitrary unary operators.
+ // Disable any SROA on the argument to arbitrary unary instructions.
disableSROA(Operand);
return false;
}
bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
- return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
+ return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
}
bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -769,7 +778,7 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
return false;
}
-bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
+bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
// If the normal destination of the invoke or the parent block of the call
// site is unreachable-terminated, there is little point in inlining this
// unless there is literally zero cost.
@@ -785,21 +794,21 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
// For now, we are not handling this corner case here as it is rare in real
// code. In future, we should elaborate this based on BPI and BFI in more
// general threshold adjusting heuristics in updateThreshold().
- Instruction *Instr = CS.getInstruction();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
return false;
- } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
+ } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
return false;
return true;
}
-bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+bool CallAnalyzer::isColdCallSite(CallBase &Call,
+ BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI && PSI->hasProfileSummary())
- return PSI->isColdCallSite(CS, CallerBFI);
+ return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
// Otherwise we need BFI to be available.
if (!CallerBFI)
@@ -810,20 +819,21 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
// complexity is not worth it unless this scaling shows up high in the
// profiles.
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
- auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteBB = Call.getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
- CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
Optional<int>
-CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
+CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's hotness is
// determined based on that.
- if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
+ if (PSI && PSI->hasProfileSummary() &&
+ PSI->isHotCallSite(CallSite(&Call), CallerBFI))
return Params.HotCallSiteThreshold;
// Otherwise we need BFI to be available and to have a locally hot callsite
@@ -835,7 +845,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
// potentially cache the computation of scaled entry frequency, but the added
// complexity is not worth it unless this scaling shows up high in the
// profiles.
- auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteBB = Call.getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
auto CallerEntryFreq = CallerBFI->getEntryFreq();
if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
@@ -845,14 +855,14 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
return None;
}
-void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
+void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
- if (!allowSizeGrowth(CS)) {
+ if (!allowSizeGrowth(Call)) {
Threshold = 0;
return;
}
- Function *Caller = CS.getCaller();
+ Function *Caller = Call.getCaller();
// return min(A, B) if B is valid.
auto MinIfValid = [](int A, Optional<int> B) {
@@ -870,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// basic block at the given callsite context. This is speculatively applied
// and withdrawn if more than one basic block is seen.
//
- // Vector bonuses: We want to more aggressively inline vector-dense kernels
- // and apply this bonus based on the percentage of vector instructions. A
- // bonus is applied if the vector instructions exceed 50% and half that amount
- // is applied if it exceeds 10%. Note that these bonuses are some what
- // arbitrary and evolved over time by accident as much as because they are
- // principled bonuses.
- // FIXME: It would be nice to base the bonus values on something more
- // scientific.
- //
// LstCallToStaticBonus: This large bonus is applied to ensure the inlining
// of the last call to a static function as inlining such functions is
// guaranteed to reduce code size.
@@ -886,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// These bonus percentages may be set to 0 based on properties of the caller
// and the callsite.
int SingleBBBonusPercent = 50;
- int VectorBonusPercent = 150;
+ int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
// Lambda to set all the above bonus and bonus percentages to 0.
@@ -898,7 +899,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
// and reduce the threshold if the caller has the necessary attribute.
- if (Caller->optForMinSize()) {
+ if (Caller->hasMinSize()) {
Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
// For minsize, we want to disable the single BB bonus and the vector
// bonuses, but not the last-call-to-static bonus. Inlining the last call to
@@ -906,12 +907,12 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// call/return instructions.
SingleBBBonusPercent = 0;
VectorBonusPercent = 0;
- } else if (Caller->optForSize())
+ } else if (Caller->hasOptSize())
Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
// Adjust the threshold based on inlinehint attribute and profile based
// hotness information if the caller does not have MinSize attribute.
- if (!Caller->optForMinSize()) {
+ if (!Caller->hasMinSize()) {
if (Callee.hasFnAttribute(Attribute::InlineHint))
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
@@ -923,15 +924,15 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// used (which adds hotness metadata to calls) or if caller's
// BlockFrequencyInfo is available.
BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
- auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
- if (!Caller->optForSize() && HotCallSiteThreshold) {
+ auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
+ if (!Caller->hasOptSize() && HotCallSiteThreshold) {
LLVM_DEBUG(dbgs() << "Hot callsite.\n");
// FIXME: This should update the threshold only if it exceeds the
// current threshold, but AutoFDO + ThinLTO currently relies on this
// behavior to prevent inlining of hot callsites during ThinLTO
// compile phase.
Threshold = HotCallSiteThreshold.getValue();
- } else if (isColdCallSite(CS, CallerBFI)) {
+ } else if (isColdCallSite(Call, CallerBFI)) {
LLVM_DEBUG(dbgs() << "Cold callsite.\n");
// Do not apply bonuses for a cold callsite including the
// LastCallToStatic bonus. While this bonus might result in code size
@@ -968,7 +969,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
VectorBonus = Threshold * VectorBonusPercent / 100;
bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically. It may seem odd to update
// Cost in updateThreshold, but the bonus depends on the logic in this method.
@@ -1087,10 +1088,34 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
// If the instruction is floating point, and the target says this operation
// is expensive, this may eventually become a library call. Treat the cost
- // as such.
+ // as such. Unless it's fneg which can be implemented with an xor.
+ using namespace llvm::PatternMatch;
if (I.getType()->isFloatingPointTy() &&
- TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
+ !match(&I, m_FNeg(m_Value())))
+ addCost(InlineConstants::CallPenalty);
+
+ return false;
+}
+
+bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
+ Value *Op = I.getOperand(0);
+ Constant *COp = dyn_cast<Constant>(Op);
+ if (!COp)
+ COp = SimplifiedValues.lookup(Op);
+
+ Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
+ cast<FPMathOperator>(I).getFastMathFlags(),
+ DL);
+
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+ SimplifiedValues[&I] = C;
+
+ if (SimpleV)
+ return true;
+
+ // Disable any SROA on arguments to arbitrary, unsimplified fneg.
+ disableSROA(Op);
return false;
}
@@ -1173,62 +1198,61 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
/// analyzing the arguments and call itself with instsimplify. Returns true if
/// it has simplified the callsite to some other entity (a constant), making it
/// free.
-bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
// FIXME: Using the instsimplify logic directly for this is inefficient
// because we have to continually rebuild the argument list even when no
// simplifications can be performed. Until that is fixed with remapping
// inside of instsimplify, directly constant fold calls here.
- if (!canConstantFoldCallTo(CS, F))
+ if (!canConstantFoldCallTo(&Call, F))
return false;
// Try to re-map the arguments to constants.
SmallVector<Constant *, 4> ConstantArgs;
- ConstantArgs.reserve(CS.arg_size());
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
- ++I) {
- Constant *C = dyn_cast<Constant>(*I);
+ ConstantArgs.reserve(Call.arg_size());
+ for (Value *I : Call.args()) {
+ Constant *C = dyn_cast<Constant>(I);
if (!C)
- C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+ C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
if (!C)
return false; // This argument doesn't map to a constant.
ConstantArgs.push_back(C);
}
- if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
- SimplifiedValues[CS.getInstruction()] = C;
+ if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
+ SimplifiedValues[&Call] = C;
return true;
}
return false;
}
-bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
+bool CallAnalyzer::visitCallBase(CallBase &Call) {
+ if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
!F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
}
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
+ if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
- if (Function *F = CS.getCalledFunction()) {
+ if (Function *F = Call.getCalledFunction()) {
// When we have a concrete function, first try to simplify it directly.
- if (simplifyCallSite(F, CS))
+ if (simplifyCallSite(F, Call))
return true;
// Next check if it is an intrinsic we know about.
// FIXME: Lift this into part of the InstVisitor.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
switch (II->getIntrinsicID()) {
default:
- if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+ if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
case Intrinsic::load_relative:
// This is normally lowered to 4 LLVM instructions.
- Cost += 3 * InlineConstants::InstrCost;
+ addCost(3 * InlineConstants::InstrCost);
return false;
case Intrinsic::memset:
@@ -1247,7 +1271,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
}
}
- if (F == CS.getInstruction()->getFunction()) {
+ if (F == Call.getFunction()) {
// This flag will fully abort the analysis, so don't bother with anything
// else.
IsRecursiveCall = true;
@@ -1257,34 +1281,34 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
if (TTI.isLoweredToCall(F)) {
// We account for the average 1 instruction per call argument setup
// here.
- Cost += CS.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Everything other than inline ASM will also have a significant cost
// merely from making the call.
- if (!isa<InlineAsm>(CS.getCalledValue()))
- Cost += InlineConstants::CallPenalty;
+ if (!isa<InlineAsm>(Call.getCalledValue()))
+ addCost(InlineConstants::CallPenalty);
}
- if (!CS.onlyReadsMemory())
+ if (!Call.onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
// Otherwise we're in a very special case -- an indirect function call. See
// if we can be particularly clever about this.
- Value *Callee = CS.getCalledValue();
+ Value *Callee = Call.getCalledValue();
// First, pay the price of the argument setup. We account for the average
// 1 instruction per call argument setup here.
- Cost += CS.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Next, check if this happens to be an indirect function call to a known
// function in this inline context. If not, we've done all we can.
Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
if (!F) {
- if (!CS.onlyReadsMemory())
+ if (!Call.onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
// If we have a constant that we are calling as a function, we can peer
@@ -1294,9 +1318,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
- CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
+ CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
IndirectCallParams);
- if (CA.analyzeCall(CS)) {
+ if (CA.analyzeCall(Call)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
Cost -= std::max(0, CA.getThreshold() - CA.getCost());
@@ -1304,7 +1328,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
if (!F->onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
@@ -1438,7 +1462,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
- Cost = CostLowerBound;
+ addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
return false;
}
@@ -1452,7 +1476,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
+ addCost(JTCost, (int64_t)CostUpperBound);
return false;
}
@@ -1473,7 +1497,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// n + n / 2 - 1 = n * 3 / 2 - 1
if (NumCaseCluster <= 3) {
// Suppose a comparison includes one compare and one conditional branch.
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+ addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
return false;
}
@@ -1481,7 +1505,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
int64_t SwitchCost =
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
+ addCost(SwitchCost, (int64_t)CostUpperBound);
return false;
}
@@ -1574,7 +1598,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
- Cost += InlineConstants::InstrCost;
+ addCost(InlineConstants::InstrCost);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
@@ -1595,7 +1619,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
- CandidateCS.getInstruction())
+ &CandidateCall)
<< NV("Callee", &F) << " has uninlinable pattern ("
<< NV("InlineResult", IR.message)
<< ") and cost is not fully computed";
@@ -1612,14 +1636,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
- CandidateCS.getInstruction())
+ &CandidateCall)
<< NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
<< ". Cost is not fully computed";
});
return IR;
}
- // Check if we've past the maximum possible threshold so we don't spin in
+ // Check if we've passed the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline.
if (Cost >= Threshold && !ComputeFullInlineCost)
return false;
@@ -1676,7 +1700,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
/// blocks to see if all their incoming edges are dead or not.
void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
- // A CFG edge is dead if the predecessor is dead or the predessor has a
+ // A CFG edge is dead if the predecessor is dead or the predecessor has a
// known successor which is not the one under exam.
return (DeadBlocks.count(Pred) ||
(KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
@@ -1712,7 +1736,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
-InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
++NumCallsAnalyzed;
// Perform some tweaks to the cost and threshold based on the direct
@@ -1729,7 +1753,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
assert(NumVectorInstructions == 0);
// Update the threshold based on callsite properties
- updateThreshold(CS, F);
+ updateThreshold(Call, F);
// While Threshold depends on commandline options that can take negative
// values, we want to enforce the invariant that the computed threshold and
@@ -1745,7 +1769,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
- Cost -= getCallsiteCost(CS, DL);
+ addCost(-getCallsiteCost(Call, DL));
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1759,14 +1783,11 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
if (F.empty())
return true;
- Function *Caller = CS.getInstruction()->getFunction();
+ Function *Caller = Call.getFunction();
// Check if the caller function is recursive itself.
for (User *U : Caller->users()) {
- CallSite Site(U);
- if (!Site)
- continue;
- Instruction *I = Site.getInstruction();
- if (I->getFunction() == Caller) {
+ CallBase *Call = dyn_cast<CallBase>(U);
+ if (Call && Call->getFunction() == Caller) {
IsCallerRecursive = true;
break;
}
@@ -1774,10 +1795,10 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
- CallSite::arg_iterator CAI = CS.arg_begin();
+ auto CAI = Call.arg_begin();
for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
FAI != FAE; ++FAI, ++CAI) {
- assert(CAI != CS.arg_end());
+ assert(CAI != Call.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
SimplifiedValues[&*FAI] = C;
@@ -1826,14 +1847,18 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Disallow inlining a blockaddress. A blockaddress only has defined
- // behavior for an indirect branch in the same function, and we do not
- // currently support inlining indirect branches. But, the inliner may not
- // see an indirect branch that ends up being dead code at a particular call
- // site. If the blockaddress escapes the function, e.g., via a global
- // variable, inlining may lead to an invalid cross-function reference.
+ // Disallow inlining a blockaddress with uses other than strictly callbr.
+ // A blockaddress only has defined behavior for an indirect branch in the
+ // same function, and we do not currently support inlining indirect
+ // branches. But, the inliner may not see an indirect branch that ends up
+ // being dead code at a particular call site. If the blockaddress escapes
+ // the function, e.g., via a global variable, inlining may lead to an
+ // invalid cross-function reference.
+ // FIXME: pr/39560: continue relaxing this overt restriction.
if (BB->hasAddressTaken())
- return "blockaddress";
+ for (User *U : BlockAddress::get(&*BB)->users())
+ if (!isa<CallBrInst>(*U))
+ return "blockaddress used outside of callbr";
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
@@ -1887,7 +1912,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
}
bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
@@ -1899,7 +1924,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// size, we penalise any call sites that perform loops. We do this after all
// other costs here, so will likely only be dealing with relatively small
// functions (and hence DT and LI will hopefully be cheap).
- if (Caller->optForMinSize()) {
+ if (Caller->hasMinSize()) {
DominatorTree DT(F);
LoopInfo LI(DT);
int NumLoops = 0;
@@ -1909,7 +1934,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
continue;
NumLoops++;
}
- Cost += NumLoops * InlineConstants::CallPenalty;
+ addCost(NumLoops * InlineConstants::CallPenalty);
}
// We applied the maximum possible vector bonus at the beginning. Now,
@@ -1953,13 +1978,13 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
+int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) {
int Cost = 0;
- for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (CS.isByValArgument(I)) {
+ for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
+ if (Call.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
// size of the byval type by the target's pointer size.
- PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
unsigned AS = PTy->getAddressSpace();
unsigned PointerSize = DL.getPointerSizeInBits(AS);
@@ -1987,16 +2012,16 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
}
InlineCost llvm::getInlineCost(
- CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
- return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+ return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI, ORE);
}
InlineCost llvm::getInlineCost(
- CallSite CS, Function *Callee, const InlineParams &Params,
+ CallBase &Call, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
@@ -2012,9 +2037,9 @@ InlineCost llvm::getInlineCost(
// argument is in the alloca address space (so it is a little bit complicated
// to solve).
unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
- for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
- if (CS.isByValArgument(I)) {
- PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
+ if (Call.isByValArgument(I)) {
+ PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
if (PTy->getAddressSpace() != AllocaAS)
return llvm::InlineCost::getNever("byval arguments without alloca"
" address space");
@@ -2022,20 +2047,21 @@ InlineCost llvm::getInlineCost(
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (CS.hasFnAttr(Attribute::AlwaysInline)) {
- if (isInlineViable(*Callee))
+ if (Call.hasFnAttr(Attribute::AlwaysInline)) {
+ auto IsViable = isInlineViable(*Callee);
+ if (IsViable)
return llvm::InlineCost::getAlways("always inline attribute");
- return llvm::InlineCost::getNever("inapplicable always inline attribute");
+ return llvm::InlineCost::getNever(IsViable.message);
}
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- Function *Caller = CS.getCaller();
+ Function *Caller = Call.getCaller();
if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
return llvm::InlineCost::getNever("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
- if (Caller->hasFnAttribute(Attribute::OptimizeNone))
+ if (Caller->hasOptNone())
return llvm::InlineCost::getNever("optnone attribute");
// Don't inline a function that treats null pointer as valid into a caller
@@ -2052,15 +2078,15 @@ InlineCost llvm::getInlineCost(
return llvm::InlineCost::getNever("noinline function attribute");
// Don't inline call sites marked noinline.
- if (CS.isNoInline())
+ if (Call.isNoInline())
return llvm::InlineCost::getNever("noinline call site attribute");
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
- CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
- Params);
- InlineResult ShouldInline = CA.analyzeCall(CS);
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
+ Call, Params);
+ InlineResult ShouldInline = CA.analyzeCall(Call);
LLVM_DEBUG(CA.dump());
@@ -2073,42 +2099,50 @@ InlineCost llvm::getInlineCost(
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
-bool llvm::isInlineViable(Function &F) {
+InlineResult llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- // Disallow inlining of functions which contain indirect branches or
- // blockaddresses.
- if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
- return false;
+ // Disallow inlining of functions which contain indirect branches.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return "contains indirect branches";
+
+ // Disallow inlining of blockaddresses which are used by non-callbr
+ // instructions.
+ if (BI->hasAddressTaken())
+ for (User *U : BlockAddress::get(&*BI)->users())
+ if (!isa<CallBrInst>(*U))
+ return "blockaddress used outside of callbr";
for (auto &II : *BI) {
- CallSite CS(&II);
- if (!CS)
+ CallBase *Call = dyn_cast<CallBase>(&II);
+ if (!Call)
continue;
// Disallow recursive calls.
- if (&F == CS.getCalledFunction())
- return false;
+ if (&F == Call->getCalledFunction())
+ return "recursive call";
// Disallow calls which expose returns-twice to a function not previously
// attributed as such.
- if (!ReturnsTwice && CS.isCall() &&
- cast<CallInst>(CS.getInstruction())->canReturnTwice())
- return false;
+ if (!ReturnsTwice && isa<CallInst>(Call) &&
+ cast<CallInst>(Call)->canReturnTwice())
+ return "exposes returns-twice attribute";
- if (CS.getCalledFunction())
- switch (CS.getCalledFunction()->getIntrinsicID()) {
+ if (Call->getCalledFunction())
+ switch (Call->getCalledFunction()->getIntrinsicID()) {
default:
break;
// Disallow inlining of @llvm.icall.branch.funnel because current
// backend can't separate call targets from call arguments.
case llvm::Intrinsic::icall_branch_funnel:
+ return "disallowed inlining of @llvm.icall.branch.funnel";
// Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
case llvm::Intrinsic::localescape:
+ return "disallowed inlining of @llvm.localescape";
// Disallow inlining of functions that initialize VarArgs with va_start.
case llvm::Intrinsic::vastart:
- return false;
+ return "contains VarArgs initialized with va_start";
}
}
}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 95ab6ee3db5b..943a99a5f46d 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -1,9 +1,8 @@
//===-- InstCount.cpp - Collects the count of all instructions ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/InstructionPrecedenceTracking.cpp b/lib/Analysis/InstructionPrecedenceTracking.cpp
index 816126f407ca..35190ce3e11a 100644
--- a/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -1,9 +1,8 @@
//===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Implements a class that is able to define some instructions as "special"
@@ -20,6 +19,7 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
@@ -153,5 +153,8 @@ bool ImplicitControlFlowTracking::isSpecialInstruction(
bool MemoryWriteTracking::isSpecialInstruction(
const Instruction *Insn) const {
+ using namespace PatternMatch;
+ if (match(Insn, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ return false;
return Insn->mayWriteToMemory();
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index ccf907c144f0..e34bf6f4e43f 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1,9 +1,8 @@
//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
@@ -50,6 +51,9 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
+ const SimplifyQuery &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
@@ -655,32 +659,11 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
- // Even though we don't look through PHI nodes, we could be called on an
- // instruction in an unreachable block, which may be on a cycle.
- SmallPtrSet<Value *, 4> Visited;
- Visited.insert(V);
- do {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if ((!AllowNonInbounds && !GEP->isInBounds()) ||
- !GEP->accumulateConstantOffset(DL, Offset))
- break;
- V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
- V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->isInterposable())
- break;
- V = GA->getAliasee();
- } else {
- if (auto CS = CallSite(V))
- if (Value *RV = CS.getReturnedArgOperand()) {
- V = RV;
- continue;
- }
- break;
- }
- assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
- } while (Visited.insert(V).second);
+ V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
+ // As that strip may trace through `addrspacecast`, need to sext or trunc
+ // the offset calculated.
+ IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
+ Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
if (V->getType()->isVectorTy())
@@ -1841,6 +1824,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op1;
}
+ // This is a similar pattern used for checking if a value is a power-of-2:
+ // (A - 1) & A --> 0 (if A is a power-of-2 or 0)
+ // A & (A - 1) --> 0 (if A is a power-of-2 or 0)
+ if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Constant::getNullValue(Op1->getType());
+ if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Constant::getNullValue(Op0->getType());
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
@@ -2280,12 +2273,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// come from a pointer that cannot overlap with dynamically-allocated
// memory within the lifetime of the current function (allocas, byval
// arguments, globals), then determine the comparison result here.
- SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+ SmallVector<const Value *, 8> LHSUObjs, RHSUObjs;
GetUnderlyingObjects(LHS, LHSUObjs, DL);
GetUnderlyingObjects(RHS, RHSUObjs, DL);
// Is the set of underlying objects all noalias calls?
- auto IsNAC = [](ArrayRef<Value *> Objects) {
+ auto IsNAC = [](ArrayRef<const Value *> Objects) {
return all_of(Objects, isNoAliasCall);
};
@@ -2295,8 +2288,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// live with the compared-to allocation). For globals, we exclude symbols
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
- auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
- return all_of(Objects, [](Value *V) {
+ auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
+ return all_of(Objects, [](const Value *V) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2472,228 +2465,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-/// Many binary operators with a constant operand have an easy-to-compute
-/// range of outputs. This can be used to fold a comparison to always true or
-/// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
- const InstrInfoQuery &IIQ) {
- unsigned Width = Lower.getBitWidth();
- const APInt *C;
- switch (BO.getOpcode()) {
- case Instruction::Add:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
- // FIXME: If we have both nuw and nsw, we should reduce the range further.
- if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
- // 'add nuw x, C' produces [C, UINT_MAX].
- Lower = *C;
- } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
- if (C->isNegative()) {
- // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + *C + 1;
- } else {
- // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
- Lower = APInt::getSignedMinValue(Width) + *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- }
- }
- break;
-
- case Instruction::And:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'and x, C' produces [0, C].
- Upper = *C + 1;
- break;
-
- case Instruction::Or:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'or x, C' produces [C, UINT_MAX].
- Lower = *C;
- break;
-
- case Instruction::AShr:
- if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
- // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
- Lower = APInt::getSignedMinValue(Width).ashr(*C);
- Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
- if (C->isNegative()) {
- // 'ashr C, x' produces [C, C >> (Width-1)]
- Lower = *C;
- Upper = C->ashr(ShiftAmount) + 1;
- } else {
- // 'ashr C, x' produces [C >> (Width-1), C]
- Lower = C->ashr(ShiftAmount);
- Upper = *C + 1;
- }
- }
- break;
-
- case Instruction::LShr:
- if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
- // 'lshr x, C' produces [0, UINT_MAX >> C].
- Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- // 'lshr C, x' produces [C >> (Width-1), C].
- unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
- Lower = C->lshr(ShiftAmount);
- Upper = *C + 1;
- }
- break;
-
- case Instruction::Shl:
- if (match(BO.getOperand(0), m_APInt(C))) {
- if (IIQ.hasNoUnsignedWrap(&BO)) {
- // 'shl nuw C, x' produces [C, C << CLZ(C)]
- Lower = *C;
- Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
- } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
- if (C->isNegative()) {
- // 'shl nsw C, x' produces [C << CLO(C)-1, C]
- unsigned ShiftAmount = C->countLeadingOnes() - 1;
- Lower = C->shl(ShiftAmount);
- Upper = *C + 1;
- } else {
- // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
- unsigned ShiftAmount = C->countLeadingZeros() - 1;
- Lower = *C;
- Upper = C->shl(ShiftAmount) + 1;
- }
- }
- }
- break;
-
- case Instruction::SDiv:
- if (match(BO.getOperand(1), m_APInt(C))) {
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C->isAllOnesValue()) {
- // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
- // where C != -1 and C != 0 and C != 1
- Lower = IntMin + 1;
- Upper = IntMax + 1;
- } else if (C->countLeadingZeros() < Width - 1) {
- // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
- // where C != -1 and C != 0 and C != 1
- Lower = IntMin.sdiv(*C);
- Upper = IntMax.sdiv(*C);
- if (Lower.sgt(Upper))
- std::swap(Lower, Upper);
- Upper = Upper + 1;
- assert(Upper != Lower && "Upper part of range has wrapped!");
- }
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- if (C->isMinSignedValue()) {
- // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
- Lower = *C;
- Upper = Lower.lshr(1) + 1;
- } else {
- // 'sdiv C, x' produces [-|C|, |C|].
- Upper = C->abs() + 1;
- Lower = (-Upper) + 1;
- }
- }
- break;
-
- case Instruction::UDiv:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
- // 'udiv x, C' produces [0, UINT_MAX / C].
- Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- // 'udiv C, x' produces [0, C].
- Upper = *C + 1;
- }
- break;
-
- case Instruction::SRem:
- if (match(BO.getOperand(1), m_APInt(C))) {
- // 'srem x, C' produces (-|C|, |C|).
- Upper = C->abs();
- Lower = (-Upper) + 1;
- }
- break;
-
- case Instruction::URem:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'urem x, C' produces [0, C).
- Upper = *C;
- break;
-
- default:
- break;
- }
-}
-
-/// Some intrinsics with a constant operand have an easy-to-compute range of
-/// outputs. This can be used to fold a comparison to always true or always
-/// false.
-static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
- APInt &Upper) {
- unsigned Width = Lower.getBitWidth();
- const APInt *C;
- switch (II.getIntrinsicID()) {
- case Intrinsic::uadd_sat:
- // uadd.sat(x, C) produces [C, UINT_MAX].
- if (match(II.getOperand(0), m_APInt(C)) ||
- match(II.getOperand(1), m_APInt(C)))
- Lower = *C;
- break;
- case Intrinsic::sadd_sat:
- if (match(II.getOperand(0), m_APInt(C)) ||
- match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
- // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + *C + 1;
- } else {
- // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
- Lower = APInt::getSignedMinValue(Width) + *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- }
- break;
- case Intrinsic::usub_sat:
- // usub.sat(C, x) produces [0, C].
- if (match(II.getOperand(0), m_APInt(C)))
- Upper = *C + 1;
- // usub.sat(x, C) produces [0, UINT_MAX - C].
- else if (match(II.getOperand(1), m_APInt(C)))
- Upper = APInt::getMaxValue(Width) - *C + 1;
- break;
- case Intrinsic::ssub_sat:
- if (match(II.getOperand(0), m_APInt(C))) {
- if (C->isNegative()) {
- // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = *C - APInt::getSignedMinValue(Width) + 1;
- } else {
- // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
- Lower = *C - APInt::getSignedMaxValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- } else if (match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
- // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
- Lower = APInt::getSignedMinValue(Width) - *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- } else {
- // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) - *C + 1;
- }
- }
- break;
- default:
- break;
- }
-}
-
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const InstrInfoQuery &IIQ) {
Type *ITy = GetCompareTy(RHS); // The return type.
@@ -2721,22 +2492,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
if (RHS_CR.isFullSet())
return ConstantInt::getTrue(ITy);
- // Find the range of possible values for binary operators.
- unsigned Width = C->getBitWidth();
- APInt Lower = APInt(Width, 0);
- APInt Upper = APInt(Width, 0);
- if (auto *BO = dyn_cast<BinaryOperator>(LHS))
- setLimitsForBinOp(*BO, Lower, Upper, IIQ);
- else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
- setLimitsForIntrinsic(*II, Lower, Upper);
-
- ConstantRange LHS_CR =
- Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
-
- if (auto *I = dyn_cast<Instruction>(LHS))
- if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
- LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
-
+ ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo);
if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(ITy);
@@ -3062,44 +2818,6 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
- Value *Op1) {
- // We need a comparison with a constant.
- const APInt *C;
- if (!match(Op1, m_APInt(C)))
- return nullptr;
-
- // matchSelectPattern returns the negation part of an abs pattern in SP1.
- // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
- // constraint, we can't make a contiguous range for the result of abs.
- ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
- Value *SP0, *SP1;
- SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
- if (SPF == SelectPatternFlavor::SPF_ABS &&
- cast<Instruction>(SP1)->hasNoSignedWrap())
- // The result of abs(X) is >= 0 (with nsw).
- AbsPred = ICmpInst::ICMP_SGE;
- if (SPF == SelectPatternFlavor::SPF_NABS)
- // The result of -abs(X) is <= 0.
- AbsPred = ICmpInst::ICMP_SLE;
-
- if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
- return nullptr;
-
- // If there is no intersection between abs/nabs and the range of this icmp,
- // the icmp must be false. If the abs/nabs range is a subset of the icmp
- // range, the icmp must be true.
- APInt Zero = APInt::getNullValue(C->getBitWidth());
- ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
- ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
- if (AbsRange.intersectWith(CmpRange).isEmptySet())
- return getFalse(GetCompareTy(Op0));
- if (CmpRange.contains(AbsRange))
- return getTrue(GetCompareTy(Op0));
-
- return nullptr;
-}
-
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3319,9 +3037,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
+ assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
Type *ITy = GetCompareTy(LHS); // The return type.
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ if (isa<UndefValue>(RHS) && ICmpInst::isEquality(Pred))
+ return UndefValue::get(ITy);
+
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
if (LHS == RHS || isa<UndefValue>(RHS))
@@ -3531,9 +3256,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
- if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
- return V;
-
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
@@ -3647,6 +3369,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Handle fcmp with constant RHS.
+ // TODO: Use match with a specific FP value, so these work with vectors with
+ // undef lanes.
const APFloat *C;
if (match(RHS, m_APFloat(C))) {
// Check whether the constant is an infinity.
@@ -3675,28 +3399,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
}
- if (C->isZero()) {
- switch (Pred) {
- case FCmpInst::FCMP_OGE:
- if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getTrue(RetTy);
- break;
- case FCmpInst::FCMP_UGE:
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getTrue(RetTy);
- break;
- case FCmpInst::FCMP_ULT:
- if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getFalse(RetTy);
- break;
- case FCmpInst::FCMP_OLT:
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getFalse(RetTy);
- break;
- default:
- break;
- }
- } else if (C->isNegative()) {
+ if (C->isNegative() && !C->isNegZero()) {
assert(!C->isNaN() && "Unexpected NaN constant!");
// TODO: We can catch more cases by using a range check rather than
// relying on CannotBeOrderedLessThanZero.
@@ -3719,6 +3422,67 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
}
}
+
+ // Check comparison of [minnum/maxnum with constant] with other constant.
+ const APFloat *C2;
+ if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) &&
+ C2->compare(*C) == APFloat::cmpLessThan) ||
+ (match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) &&
+ C2->compare(*C) == APFloat::cmpGreaterThan)) {
+ bool IsMaxNum =
+ cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum;
+ // The ordered relationship and minnum/maxnum guarantee that we do not
+ // have NaN constants, so ordered/unordered preds are handled the same.
+ switch (Pred) {
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
+ // minnum(X, LesserC) == C --> false
+ // maxnum(X, GreaterC) == C --> false
+ return getFalse(RetTy);
+ case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
+ // minnum(X, LesserC) != C --> true
+ // maxnum(X, GreaterC) != C --> true
+ return getTrue(RetTy);
+ case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
+ // minnum(X, LesserC) >= C --> false
+ // minnum(X, LesserC) > C --> false
+ // maxnum(X, GreaterC) >= C --> true
+ // maxnum(X, GreaterC) > C --> true
+ return ConstantInt::get(RetTy, IsMaxNum);
+ case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
+ // minnum(X, LesserC) <= C --> true
+ // minnum(X, LesserC) < C --> true
+ // maxnum(X, GreaterC) <= C --> false
+ // maxnum(X, GreaterC) < C --> false
+ return ConstantInt::get(RetTy, !IsMaxNum);
+ default:
+ // TRUE/FALSE/ORD/UNO should be handled before this.
+ llvm_unreachable("Unexpected fcmp predicate");
+ }
+ }
+ }
+
+ if (match(RHS, m_AnyZeroFP())) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_ULT:
+ // Positive or zero X >= 0.0 --> true
+ // Positive or zero X < 0.0 --> false
+ if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) &&
+ CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy);
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OLT:
+ // Positive or zero or nan X >= 0.0 --> true
+ // Positive or zero or nan X < 0.0 --> false
+ if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
+ break;
+ default:
+ break;
+ }
}
// If the comparison is with the result of a select instruction, check whether
@@ -3904,27 +3668,44 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
Pred == ICmpInst::ICMP_EQ))
return V;
- // Test for zero-shift-guard-ops around funnel shifts. These are used to
- // avoid UB from oversized shifts in raw IR rotate patterns, but the
- // intrinsics do not have that problem.
+ // Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
Value *ShAmt;
auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
m_Value(ShAmt)),
m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
m_Value(ShAmt)));
- // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
- // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
// (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
// (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
- if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
- return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
-
- // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
- // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+ if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_EQ)
+ return X;
// (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
// (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
- if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
- return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
+ if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_NE)
+ return X;
+
+ // Test for a zero-shift-guard-op around rotates. These are used to
+ // avoid UB from oversized shifts in raw IR rotate patterns, but the
+ // intrinsics do not have that problem.
+ // We do not allow this transform for the general funnel shift case because
+ // that would not preserve the poison safety of the original code.
+ auto isRotate = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X),
+ m_Deferred(X),
+ m_Value(ShAmt)),
+ m_Intrinsic<Intrinsic::fshr>(m_Value(X),
+ m_Deferred(X),
+ m_Value(ShAmt)));
+ // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt)
+ // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt)
+ if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_NE)
+ return TrueVal;
+ // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
+ // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
+ if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_EQ)
+ return FalseVal;
}
// Check for other compares that behave like bit test.
@@ -4218,6 +3999,17 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
if (isa<UndefValue>(Idx))
return UndefValue::get(Vec->getType());
+ // Inserting an undef scalar? Assume it is the same value as the existing
+ // vector element.
+ if (isa<UndefValue>(Val))
+ return Vec;
+
+ // If we are extracting a value from a vector, then inserting it into the same
+ // place, that's the input vector:
+ // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec
+ if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx))))
+ return Vec;
+
return nullptr;
}
@@ -4495,6 +4287,33 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
+static Constant *foldConstant(Instruction::UnaryOps Opcode,
+ Value *&Op, const SimplifyQuery &Q) {
+ if (auto *C = dyn_cast<Constant>(Op))
+ return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
+ return nullptr;
+}
+
+/// Given the operand for an FNeg, see if we can fold the result. If not, this
+/// returns null.
+static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldConstant(Instruction::FNeg, Op, Q))
+ return C;
+
+ Value *X;
+ // fneg (fneg X) ==> X
+ if (match(Op, m_FNeg(m_Value(X))))
+ return X;
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
+}
+
static Constant *propagateNaN(Constant *In) {
// If the input is a vector with undef elements, just return a default NaN.
if (!In->isNaN())
@@ -4536,16 +4355,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
- // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant)
+ // With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
// Negative zeros are allowed because we always end up with positive zero:
// X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
// X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
- if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
- match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
- return ConstantFP::getNullValue(Op0->getType());
+ if (FMF.noNaNs()) {
+ if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
+ match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
+ return ConstantFP::getNullValue(Op0->getType());
+
+ if (match(Op0, m_FNeg(m_Specific(Op1))) ||
+ match(Op1, m_FNeg(m_Specific(Op0))))
+ return ConstantFP::getNullValue(Op0->getType());
+ }
// (X - Y) + Y --> X
// Y + (X - Y) --> X
@@ -4578,14 +4403,17 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
+ // fsub -0.0, (fneg X) ==> X
Value *X;
if (match(Op0, m_NegZeroFP()) &&
- match(Op1, m_FSub(m_NegZeroFP(), m_Value(X))))
+ match(Op1, m_FNeg(m_Value(X))))
return X;
// fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
+ // fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) &&
- match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))))
+ (match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))) ||
+ match(Op1, m_FNeg(m_Value(X)))))
return X;
// fsub nnan x, x ==> 0.0
@@ -4722,6 +4550,42 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
//=== Helper functions for higher up the class hierarchy.
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FNeg:
+ return simplifyFNegInst(Op, FastMathFlags(), Q, MaxRecurse);
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
+ const FastMathFlags &FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FNeg:
+ return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
+ default:
+ return simplifyUnOp(Opcode, Op, Q, MaxRecurse);
+ }
+}
+
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+ return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
+}
+
+Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
+}
+
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -4885,22 +4749,6 @@ static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
}
-static bool maskIsAllZeroOrUndef(Value *Mask) {
- auto *ConstMask = dyn_cast<Constant>(Mask);
- if (!ConstMask)
- return false;
- if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
- return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
- if (auto *MaskElt = ConstMask->getAggregateElement(I))
- if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
- continue;
- return false;
- }
- return true;
-}
-
static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
const SimplifyQuery &Q) {
// Idempotent functions return the same result when called repeatedly.
@@ -4941,8 +4789,32 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
case Intrinsic::log2:
// log2(exp2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X;
+ (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
+ match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
+ m_Value(X))))) return X;
+ break;
+ case Intrinsic::log10:
+ // log10(pow(10.0, x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
+ m_Value(X)))) return X;
break;
+ case Intrinsic::floor:
+ case Intrinsic::trunc:
+ case Intrinsic::ceil:
+ case Intrinsic::round:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint: {
+ // floor (sitofp x) -> sitofp x
+ // floor (uitofp x) -> uitofp x
+ //
+ // Converting from int always results in a finite integral number or
+ // infinity. For either of those inputs, these rounding functions always
+ // return the same value, so the rounding can be eliminated.
+ if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
+ return Op0;
+ break;
+ }
default:
break;
}
@@ -4960,16 +4832,19 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// X - X -> { 0, false }
if (Op0 == Op1)
return Constant::getNullValue(ReturnType);
- // X - undef -> undef
- // undef - X -> undef
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return UndefValue::get(ReturnType);
- break;
+ LLVM_FALLTHROUGH;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // X + undef -> undef
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return UndefValue::get(ReturnType);
+ // X - undef -> { undef, false }
+ // undef - X -> { undef, false }
+ // X + undef -> { undef, false }
+ // undef + x -> { undef, false }
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) {
+ return ConstantStruct::get(
+ cast<StructType>(ReturnType),
+ {UndefValue::get(ReturnType->getStructElementType(0)),
+ Constant::getNullValue(ReturnType->getStructElementType(1))});
+ }
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
@@ -5085,26 +4960,28 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return nullptr;
}
-template <typename IterTy>
-static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
- const SimplifyQuery &Q) {
+static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
+
// Intrinsics with no operands have some kind of side effect. Don't simplify.
- unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
- if (NumOperands == 0)
+ unsigned NumOperands = Call->getNumArgOperands();
+ if (!NumOperands)
return nullptr;
+ Function *F = cast<Function>(Call->getCalledFunction());
Intrinsic::ID IID = F->getIntrinsicID();
if (NumOperands == 1)
- return simplifyUnaryIntrinsic(F, ArgBegin[0], Q);
+ return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
if (NumOperands == 2)
- return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q);
+ return simplifyBinaryIntrinsic(F, Call->getArgOperand(0),
+ Call->getArgOperand(1), Q);
// Handle intrinsics with 3 or more arguments.
switch (IID) {
- case Intrinsic::masked_load: {
- Value *MaskArg = ArgBegin[2];
- Value *PassthruArg = ArgBegin[3];
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather: {
+ Value *MaskArg = Call->getArgOperand(2);
+ Value *PassthruArg = Call->getArgOperand(3);
// If the mask is all zeros or undef, the "passthru" argument is the result.
if (maskIsAllZeroOrUndef(MaskArg))
return PassthruArg;
@@ -5112,7 +4989,8 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+ Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1),
+ *ShAmtArg = Call->getArgOperand(2);
// If both operands are undef, the result is undef.
if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
@@ -5120,15 +4998,14 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
// If shift amount is undef, assume it is zero.
if (match(ShAmtArg, m_Undef()))
- return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
- // TODO: For vectors, we could check each element of a non-splat constant.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
if (ShAmtC->urem(BitWidth).isNullValue())
- return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
}
return nullptr;
}
@@ -5137,58 +5014,36 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
}
-template <typename IterTy>
-static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
- IterTy ArgEnd, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
- Type *Ty = V->getType();
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- Ty = PTy->getElementType();
- FunctionType *FTy = cast<FunctionType>(Ty);
+Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+ Value *Callee = Call->getCalledValue();
// call undef -> undef
// call null -> undef
- if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
- return UndefValue::get(FTy->getReturnType());
+ if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
+ return UndefValue::get(Call->getType());
- Function *F = dyn_cast<Function>(V);
+ Function *F = dyn_cast<Function>(Callee);
if (!F)
return nullptr;
if (F->isIntrinsic())
- if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q))
+ if (Value *Ret = simplifyIntrinsic(Call, Q))
return Ret;
- if (!canConstantFoldCallTo(CS, F))
+ if (!canConstantFoldCallTo(Call, F))
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
- ConstantArgs.reserve(ArgEnd - ArgBegin);
- for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
- Constant *C = dyn_cast<Constant>(*I);
+ unsigned NumArgs = Call->getNumArgOperands();
+ ConstantArgs.reserve(NumArgs);
+ for (auto &Arg : Call->args()) {
+ Constant *C = dyn_cast<Constant>(&Arg);
if (!C)
return nullptr;
ConstantArgs.push_back(C);
}
- return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
- User::op_iterator ArgBegin, User::op_iterator ArgEnd,
- const SimplifyQuery &Q) {
- return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
- ArrayRef<Value *> Args, const SimplifyQuery &Q) {
- return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
- CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
- return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- Q, RecursionLimit);
+ return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
/// See if we can compute a simplified version of this instruction.
@@ -5203,6 +5058,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
default:
Result = ConstantFoldInstruction(I, Q.DL, Q.TLI);
break;
+ case Instruction::FNeg:
+ Result = SimplifyFNegInst(I->getOperand(0), I->getFastMathFlags(), Q);
+ break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
I->getFastMathFlags(), Q);
@@ -5327,8 +5185,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
Result = SimplifyPHINode(cast<PHINode>(I), Q);
break;
case Instruction::Call: {
- CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS, Q);
+ Result = SimplifyCall(cast<CallInst>(I), Q);
break;
}
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 6d5de22cb93f..07d6e27c13be 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -1,9 +1,8 @@
//===- Interval.cpp - Interval class code ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index c777d91b67c6..d12db010db6a 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -1,9 +1,8 @@
//===- IntervalPartition.cpp - Interval Partition module code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
deleted file mode 100644
index 000fe5ddad54..000000000000
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Compute iterated dominance frontiers using a linear time algorithm.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include <queue>
-
-namespace llvm {
-
-template <class NodeTy, bool IsPostDom>
-void IDFCalculator<NodeTy, IsPostDom>::calculate(
- SmallVectorImpl<BasicBlock *> &PHIBlocks) {
- // Use a priority queue keyed on dominator tree level so that inserted nodes
- // are handled from the bottom of the dominator tree upwards. We also augment
- // the level with a DFS number to ensure that the blocks are ordered in a
- // deterministic way.
- typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>>
- DomTreeNodePair;
- typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
- less_second> IDFPriorityQueue;
- IDFPriorityQueue PQ;
-
- DT.updateDFSNumbers();
-
- for (BasicBlock *BB : *DefBlocks) {
- if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
- }
-
- SmallVector<DomTreeNode *, 32> Worklist;
- SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
- SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
-
- while (!PQ.empty()) {
- DomTreeNodePair RootPair = PQ.top();
- PQ.pop();
- DomTreeNode *Root = RootPair.first;
- unsigned RootLevel = RootPair.second.first;
-
- // Walk all dominator tree children of Root, inspecting their CFG edges with
- // targets elsewhere on the dominator tree. Only targets whose level is at
- // most Root's level are added to the iterated dominance frontier of the
- // definition set.
-
- Worklist.clear();
- Worklist.push_back(Root);
- VisitedWorklist.insert(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- BasicBlock *BB = Node->getBlock();
- // Succ is the successor in the direction we are calculating IDF, so it is
- // successor for IDF, and predecessor for Reverse IDF.
- auto DoWork = [&](BasicBlock *Succ) {
- DomTreeNode *SuccNode = DT.getNode(Succ);
-
- // Quickly skip all CFG edges that are also dominator tree edges instead
- // of catching them below.
- if (SuccNode->getIDom() == Node)
- return;
-
- const unsigned SuccLevel = SuccNode->getLevel();
- if (SuccLevel > RootLevel)
- return;
-
- if (!VisitedPQ.insert(SuccNode).second)
- return;
-
- BasicBlock *SuccBB = SuccNode->getBlock();
- if (useLiveIn && !LiveInBlocks->count(SuccBB))
- return;
-
- PHIBlocks.emplace_back(SuccBB);
- if (!DefBlocks->count(SuccBB))
- PQ.push(std::make_pair(
- SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
- };
-
- if (GD) {
- for (auto Pair : children<
- std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
- {GD, BB}))
- DoWork(Pair.second);
- } else {
- for (auto *Succ : children<NodeTy>(BB))
- DoWork(Succ);
- }
-
- for (auto DomChild : *Node) {
- if (VisitedWorklist.insert(DomChild).second)
- Worklist.push_back(DomChild);
- }
- }
- }
-}
-
-template class IDFCalculator<BasicBlock *, false>;
-template class IDFCalculator<Inverse<BasicBlock *>, true>;
-}
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 93c23bca96af..439758560284 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyBlockFrequencyInfo.cpp - Lazy Block Frequency Analysis ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index 429b78c3a47e..f2592c26b373 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 3f22ada803c9..797fcf516429 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -1,9 +1,8 @@
//===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -173,6 +172,19 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
}
+ // Externally visible aliases of internal functions are also viable entry
+ // edges to the module.
+ for (auto &A : M.aliases()) {
+ if (A.hasLocalLinkage())
+ continue;
+ if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ LLVM_DEBUG(dbgs() << " Adding '" << F->getName()
+ << "' with alias '" << A.getName()
+ << "' to entry set of the graph.\n");
+ addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(*F), Edge::Ref);
+ }
+ }
+
// Now add entry nodes for functions reachable via initializers to globals.
SmallVector<Constant *, 16> Worklist;
SmallPtrSet<Constant *, 16> Visited;
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 110c085d3f35..542ff709d475 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -423,10 +422,18 @@ namespace {
BasicBlock *BB);
Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
BasicBlock *BB);
+ bool solveBlockValueBinaryOpImpl(
+ ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+ std::function<ConstantRange(const ConstantRange &,
+ const ConstantRange &)> OpFn);
bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
BasicBlock *BB);
+ bool solveBlockValueOverflowIntrinsic(
+ ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+ bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
+ BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
ValueLatticeElement &BBLV,
Instruction *BBI);
@@ -625,7 +632,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
// and the like to prove non-nullness, but it's not clear that's worth it
// compile time wise. The context-insensitive value walk done inside
// isKnownNonZero gets most of the profitable cases at much less expense.
- // This does mean that we have a sensativity to where the defining
+ // This does mean that we have a sensitivity to where the defining
// instruction is placed, even if it could legally be hoisted much higher.
// That is unfortunate.
PointerType *PT = dyn_cast<PointerType>(BBI->getType());
@@ -639,6 +646,14 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(Res, BO, BB);
+
+ if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+ return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(BBI))
+ return solveBlockValueIntrinsic(Res, II, BB);
}
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -824,7 +839,9 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
if (!GuardDecl || GuardDecl->use_empty())
return;
- for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+ if (BBI->getIterator() == BBI->getParent()->begin())
+ return;
+ for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
BBI->getParent()->rend())) {
Value *Cond = nullptr;
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
@@ -892,7 +909,28 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
return true;
}
- // TODO: ABS, NABS from the SelectPatternResult
+ if (SPR.Flavor == SPF_ABS) {
+ if (LHS == SI->getTrueValue()) {
+ BBLV = ValueLatticeElement::getRange(TrueCR.abs());
+ return true;
+ }
+ if (LHS == SI->getFalseValue()) {
+ BBLV = ValueLatticeElement::getRange(FalseCR.abs());
+ return true;
+ }
+ }
+
+ if (SPR.Flavor == SPF_NABS) {
+ ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth()));
+ if (LHS == SI->getTrueValue()) {
+ BBLV = ValueLatticeElement::getRange(Zero.sub(TrueCR.abs()));
+ return true;
+ }
+ if (LHS == SI->getFalseValue()) {
+ BBLV = ValueLatticeElement::getRange(Zero.sub(FalseCR.abs()));
+ return true;
+ }
+ }
}
// Can we constrain the facts about the true and false values by using the
@@ -962,7 +1000,7 @@ Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
const unsigned OperandBitWidth =
DL.getTypeSizeInBits(I->getOperand(Op)->getType());
- ConstantRange Range = ConstantRange(OperandBitWidth);
+ ConstantRange Range = ConstantRange::getFull(OperandBitWidth);
if (hasBlockValue(I->getOperand(Op), BB)) {
ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
@@ -1018,56 +1056,83 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
return true;
}
+bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
+ ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+ std::function<ConstantRange(const ConstantRange &,
+ const ConstantRange &)> OpFn) {
+ // Figure out the ranges of the operands. If that fails, use a
+ // conservative range, but apply the transfer rule anyways. This
+ // lets us pick up facts from expressions like "and i32 (call i32
+ // @foo()), 32"
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB);
+ Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB);
+ if (!LHSRes.hasValue() || !RHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
+
+ ConstantRange LHSRange = LHSRes.getValue();
+ ConstantRange RHSRange = RHSRes.getValue();
+ BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+ return true;
+}
+
bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
BinaryOperator *BO,
BasicBlock *BB) {
assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
-
- // Filter out operators we don't know how to reason about before attempting to
- // recurse on our operand(s). This can cut a long search short if we know
- // we're not going to be able to get any useful information anyways.
- switch (BO->getOpcode()) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- // continue into the code below
- break;
- default:
- // Unhandled instructions are overdefined.
+ if (BO->getOpcode() == Instruction::Xor) {
+ // Xor is the only operation not supported by ConstantRange::binaryOp().
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown binary operator).\n");
BBLV = ValueLatticeElement::getOverdefined();
return true;
- };
-
- // Figure out the ranges of the operands. If that fails, use a
- // conservative range, but apply the transfer rule anyways. This
- // lets us pick up facts from expressions like "and i32 (call i32
- // @foo()), 32"
- Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
- Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
+ }
- if (!LHSRes.hasValue() || !RHSRes.hasValue())
- // More work to do before applying this transfer rule.
- return false;
+ return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
+ [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.binaryOp(BO->getOpcode(), CR2);
+ });
+}
- ConstantRange LHSRange = LHSRes.getValue();
- ConstantRange RHSRange = RHSRes.getValue();
+bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
+ ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) {
+ return solveBlockValueBinaryOpImpl(BBLV, WO, BB,
+ [WO](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.binaryOp(WO->getBinaryOp(), CR2);
+ });
+}
- // NOTE: We're currently limited by the set of operations that ConstantRange
- // can evaluate symbolically. Enhancing that set will allows us to analyze
- // more definitions.
- Instruction::BinaryOps BinOp = BO->getOpcode();
- BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange));
- return true;
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(
+ ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.uadd_sat(CR2);
+ });
+ case Intrinsic::usub_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.usub_sat(CR2);
+ });
+ case Intrinsic::sadd_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.sadd_sat(CR2);
+ });
+ case Intrinsic::ssub_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.ssub_sat(CR2);
+ });
+ default:
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown intrinsic).\n");
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+ }
}
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
@@ -1133,6 +1198,28 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
return ValueLatticeElement::getOverdefined();
}
+// Handle conditions of the form
+// extractvalue(op.with.overflow(%x, C), 1).
+static ValueLatticeElement getValueFromOverflowCondition(
+ Value *Val, WithOverflowInst *WO, bool IsTrueDest) {
+ // TODO: This only works with a constant RHS for now. We could also compute
+ // the range of the RHS, but this doesn't fit into the current structure of
+ // the edge value calculation.
+ const APInt *C;
+ if (WO->getLHS() != Val || !match(WO->getRHS(), m_APInt(C)))
+ return ValueLatticeElement::getOverdefined();
+
+ // Calculate the possible values of %x for which no overflow occurs.
+ ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
+ WO->getBinaryOp(), *C, WO->getNoWrapKind());
+
+ // If overflow is false, %x is constrained to NWR. If overflow is true, %x is
+ // constrained to it's inverse (all values that might cause overflow).
+ if (IsTrueDest)
+ NWR = NWR.inverse();
+ return ValueLatticeElement::getRange(NWR);
+}
+
static ValueLatticeElement
getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
DenseMap<Value*, ValueLatticeElement> &Visited);
@@ -1143,6 +1230,11 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
return getValueFromICmpCondition(Val, ICI, isTrueDest);
+ if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
+ return getValueFromOverflowCondition(Val, WO, isTrueDest);
+
// Handle conditions in the form of (cond1 && cond2), we know that on the
// true dest path both of the conditions hold. Similarly for conditions of
// the form (cond1 || cond2), we know that on the false dest path neither
@@ -1575,14 +1667,14 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isUndefined())
- return ConstantRange(Width, /*isFullSet=*/false);
+ return ConstantRange::getEmpty(Width);
if (Result.isConstantRange())
return Result.getConstantRange();
// We represent ConstantInt constants as constant ranges but other kinds
// of integer constants, i.e. ConstantExpr will be tagged as constants
assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
"ConstantInt value must be represented as constantrange");
- return ConstantRange(Width, /*isFullSet=*/true);
+ return ConstantRange::getFull(Width);
}
/// Determine whether the specified value is known to be a
@@ -1614,14 +1706,14 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isUndefined())
- return ConstantRange(Width, /*isFullSet=*/false);
+ return ConstantRange::getEmpty(Width);
if (Result.isConstantRange())
return Result.getConstantRange();
// We represent ConstantInt constants as constant ranges but other kinds
// of integer constants, i.e. ConstantExpr will be tagged as constants
assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
"ConstantInt value must be represented as constantrange");
- return ConstantRange(Width, /*isFullSet=*/true);
+ return ConstantRange::getFull(Width);
}
static LazyValueInfo::Tristate
@@ -1711,7 +1803,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
// through would still be correct.
const DataLayout &DL = CxtI->getModule()->getDataLayout();
if (V->getType()->isPointerTy() && C->isNullValue() &&
- isKnownNonZero(V->stripPointerCasts(), DL)) {
+ isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) {
if (Pred == ICmpInst::ICMP_EQ)
return LazyValueInfo::False;
else if (Pred == ICmpInst::ICMP_NE)
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 5540859ebdda..52212e1c42aa 100644
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -1,10 +1,9 @@
//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
//Implementation -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 5d0a627f8426..d28b8a189d4b 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -1,9 +1,8 @@
//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -268,10 +267,14 @@ void Lint::visitCallSite(CallSite CS) {
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
AttributeList PAL = CS.getAttributes();
unsigned ArgNo = 0;
- for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE;
+ ++BI, ++ArgNo) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack so we're not really passing the pointer anyway.
- if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal))
+ continue;
+ // If both arguments are readonly, they have no dependence.
+ if (Formal->onlyReadsMemory() && CS.onlyReadsMemory(ArgNo))
continue;
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 8129795bc0c1..31da4e9ec783 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -1,9 +1,8 @@
//===- Loads.cpp - Local load analysis ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -126,7 +125,8 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
Visited);
}
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+ unsigned Align,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
@@ -134,8 +134,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
- Type *VTy = V->getType();
- Type *Ty = VTy->getPointerElementType();
// Require ABI alignment for loads without alignment specification
if (Align == 0)
@@ -146,14 +144,16 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
SmallPtrSet<const Value *, 32> Visited;
return ::isDereferenceableAndAlignedPointer(
- V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
- CtxI, DT, Visited);
+ V, Align,
+ APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
+ DL, CtxI, DT, Visited);
}
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
+ const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
- return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
}
/// Test if A and B will obviously have the same value.
@@ -198,7 +198,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
///
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
@@ -209,7 +209,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
// If DT is not specified we can't make context-sensitive query
const Instruction* CtxI = DT ? ScanFrom : nullptr;
- if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT))
+ if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
return true;
int64_t ByteOffset = 0;
@@ -281,9 +281,17 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
Value *AccessedPtr;
unsigned AccessedAlign;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ // Ignore volatile loads. The execution of a volatile load cannot
+ // be used to prove an address is backed by regular memory; it can,
+ // for example, point to an MMIO register.
+ if (LI->isVolatile())
+ continue;
AccessedPtr = LI->getPointerOperand();
AccessedAlign = LI->getAlignment();
} else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Ignore volatile stores (see comment for loads).
+ if (SI->isVolatile())
+ continue;
AccessedPtr = SI->getPointerOperand();
AccessedAlign = SI->getAlignment();
} else
@@ -306,7 +314,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
return false;
}
-/// DefMaxInstsToScan - the default number of maximum instructions
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+ const DataLayout &DL,
+ Instruction *ScanFrom,
+ const DominatorTree *DT) {
+ APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
+ return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+}
+
+ /// DefMaxInstsToScan - the default number of maximum instructions
/// to scan in the block, used by FindAvailableLoadedValue().
/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
/// threading in part by eliminating partially redundant loads.
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 7f3480f512ab..36bd9a8b7ea7 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1,9 +1,8 @@
//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -843,7 +842,7 @@ void AccessAnalysis::processMemAccesses() {
bool SetHasWrite = false;
// Map of pointers to last access encountered.
- typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
UnderlyingObjToAccessMap ObjToLastAccess;
// Set of access to check after all writes have been processed.
@@ -904,13 +903,13 @@ void AccessAnalysis::processMemAccesses() {
// Create sets of pointers connected by a shared alias set and
// underlying object.
- typedef SmallVector<Value *, 16> ValueVector;
+ typedef SmallVector<const Value *, 16> ValueVector;
ValueVector TempObjects;
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
LLVM_DEBUG(dbgs()
<< "Underlying objects for pointer " << *Ptr << "\n");
- for (Value *UnderlyingObj : TempObjects) {
+ for (const Value *UnderlyingObj : TempObjects) {
// nullptr never alias, don't join sets for pointer that have "null"
// in their UnderlyingObjects list.
if (isa<ConstantPointerNull>(UnderlyingObj) &&
@@ -1014,7 +1013,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
return 0;
}
- // The accesss function must stride over the innermost loop.
+ // The access function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n");
@@ -1086,7 +1085,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
- << "inbouds or in address space 0 may wrap:\n"
+ << "inbounds or in address space 0 may wrap:\n"
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
@@ -1145,10 +1144,9 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
// Sort the memory accesses and keep the order of their uses in UseOrder.
- std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
- [&OffValPairs](unsigned Left, unsigned Right) {
- return OffValPairs[Left].first < OffValPairs[Right].first;
- });
+ llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
+ return OffValPairs[Left].first < OffValPairs[Right].first;
+ });
// Check if the order is consecutive already.
if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
@@ -1346,7 +1344,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// where Step is the absolute stride of the memory accesses in bytes,
// then there is no dependence.
//
- // Ratioanle:
+ // Rationale:
// We basically want to check if the absolute distance (|Dist/Step|)
// is >= the loop iteration count (or > BackedgeTakenCount).
// This is equivalent to the Strong SIV Test (Practical Dependence Testing,
@@ -1369,7 +1367,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// The dependence distance can be positive/negative, so we sign extend Dist;
// The multiplication of the absolute stride in bytes and the
- // backdgeTakenCount is non-negative, so we zero extend Product.
+ // backedgeTakenCount is non-negative, so we zero extend Product.
if (DistTypeSize > ProductTypeSize)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
else
@@ -1780,6 +1778,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
+ bool HasComplexMemInst = false;
+
+ // A runtime check is only legal to insert if there are no convergent calls.
+ HasConvergentOp = false;
+
PtrRtChecking->Pointers.clear();
PtrRtChecking->Need = false;
@@ -1787,8 +1790,25 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
- // Scan the BB and collect legal loads and stores.
+ // Scan the BB and collect legal loads and stores. Also detect any
+ // convergent instructions.
for (Instruction &I : *BB) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (Call->isConvergent())
+ HasConvergentOp = true;
+ }
+
+ // With both a non-vectorizable memory instruction and a convergent
+ // operation, found in this loop, no reason to continue the search.
+ if (HasComplexMemInst && HasConvergentOp) {
+ CanVecMem = false;
+ return;
+ }
+
+ // Avoid hitting recordAnalysis multiple times.
+ if (HasComplexMemInst)
+ continue;
+
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
@@ -1807,12 +1827,18 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
continue;
auto *Ld = dyn_cast<LoadInst>(&I);
- if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ if (!Ld) {
+ recordAnalysis("CantVectorizeInstruction", Ld)
+ << "instruction cannot be vectorized";
+ HasComplexMemInst = true;
+ continue;
+ }
+ if (!Ld->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleLoad", Ld)
<< "read with atomic ordering or volatile read";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
NumLoads++;
Loads.push_back(Ld);
@@ -1828,15 +1854,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (!St) {
recordAnalysis("CantVectorizeInstruction", St)
<< "instruction cannot be vectorized";
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleStore", St)
<< "write with atomic ordering or volatile write";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
NumStores++;
Stores.push_back(St);
@@ -1847,6 +1873,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
} // Next instr.
} // Next block.
+ if (HasComplexMemInst) {
+ CanVecMem = false;
+ return;
+ }
+
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -1964,7 +1995,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
LLVM_DEBUG(
- dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
@@ -1999,6 +2030,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
}
+ if (HasConvergentOp) {
+ recordAnalysis("CantInsertRuntimeCheckWithConvergent")
+ << "cannot add control dependency to convergent operation";
+ LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
+ "would be needed with a convergent operation\n");
+ CanVecMem = false;
+ return;
+ }
+
if (CanVecMem)
LLVM_DEBUG(
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
@@ -2252,7 +2292,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// Match the types so we can compare the stride and the BETakenCount.
// The Stride can be positive/negative, so we sign extend Stride;
- // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
+ // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
@@ -2287,6 +2327,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
+ HasConvergentOp(false),
HasDependenceInvolvingLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(AA, LI, TLI, DT);
@@ -2303,6 +2344,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS << "\n";
}
+ if (HasConvergentOp)
+ OS.indent(Depth) << "Has convergent operation in loop\n";
+
if (Report)
OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index 2a3b29d7fbca..a10a87ce113b 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -1,9 +1,8 @@
//===- LoopAnalysisManager.cpp - Loop analysis management -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,11 +18,6 @@
using namespace llvm;
namespace llvm {
-/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
-cl::opt<bool> EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
- cl::desc("Enable MemorySSA dependency for loop pass manager"));
-
// Explicit template instantiations and specialization definitions for core
// template typedefs.
template class AllAnalysesOn<Loop>;
@@ -147,8 +141,6 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
PA.preserve<ScalarEvolutionAnalysis>();
- if (EnableMSSALoopDependency)
- PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
PA.preserve<AAManager>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index ef2b1257015c..aa5da0859805 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -1,9 +1,8 @@
//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,8 +17,12 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
@@ -65,15 +68,16 @@ bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
}
-bool Loop::makeLoopInvariant(Value *V, bool &Changed,
- Instruction *InsertPt) const {
+bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt,
+ MemorySSAUpdater *MSSAU) const {
if (Instruction *I = dyn_cast<Instruction>(V))
- return makeLoopInvariant(I, Changed, InsertPt);
+ return makeLoopInvariant(I, Changed, InsertPt, MSSAU);
return true; // All non-instructions are loop-invariant.
}
bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
- Instruction *InsertPt) const {
+ Instruction *InsertPt,
+ MemorySSAUpdater *MSSAU) const {
// Test if the value is already loop-invariant.
if (isLoopInvariant(I))
return true;
@@ -94,11 +98,14 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
}
// Don't hoist instructions with loop-variant operands.
for (Value *Operand : I->operands())
- if (!makeLoopInvariant(Operand, Changed, InsertPt))
+ if (!makeLoopInvariant(Operand, Changed, InsertPt, MSSAU))
return false;
// Hoist.
I->moveBefore(InsertPt);
+ if (MSSAU)
+ if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I))
+ MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End);
// There is possibility of hoisting this instruction above some arbitrary
// condition. Any metadata defined on it can be control dependent on this
@@ -110,24 +117,37 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
return true;
}
-PHINode *Loop::getCanonicalInductionVariable() const {
+bool Loop::getIncomingAndBackEdge(BasicBlock *&Incoming,
+ BasicBlock *&Backedge) const {
BasicBlock *H = getHeader();
- BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+ Incoming = nullptr;
+ Backedge = nullptr;
pred_iterator PI = pred_begin(H);
assert(PI != pred_end(H) && "Loop must have at least one backedge!");
Backedge = *PI++;
if (PI == pred_end(H))
- return nullptr; // dead loop
+ return false; // dead loop
Incoming = *PI++;
if (PI != pred_end(H))
- return nullptr; // multiple backedges?
+ return false; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
- return nullptr;
+ return false;
std::swap(Incoming, Backedge);
} else if (!contains(Backedge))
+ return false;
+
+ assert(Incoming && Backedge && "expected non-null incoming and backedges");
+ return true;
+}
+
+PHINode *Loop::getCanonicalInductionVariable() const {
+ BasicBlock *H = getHeader();
+
+ BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+ if (!getIncomingAndBackEdge(Incoming, Backedge))
return nullptr;
// Loop over all of the PHI nodes, looking for a canonical indvar.
@@ -146,6 +166,218 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return nullptr;
}
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+ if (BasicBlock *Latch = L.getLoopLatch())
+ if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+ if (BI->isConditional())
+ return dyn_cast<ICmpInst>(BI->getCondition());
+
+ return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+ const Instruction &StepInst) {
+ ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+ if (!LatchCmpInst)
+ return nullptr;
+
+ Value *Op0 = LatchCmpInst->getOperand(0);
+ Value *Op1 = LatchCmpInst->getOperand(1);
+ if (Op0 == &IndVar || Op0 == &StepInst)
+ return Op1;
+
+ if (Op1 == &IndVar || Op1 == &StepInst)
+ return Op0;
+
+ return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+ PHINode &IndVar,
+ ScalarEvolution &SE) {
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+ return None;
+
+ Value *InitialIVValue = IndDesc.getStartValue();
+ Instruction *StepInst = IndDesc.getInductionBinOp();
+ if (!InitialIVValue || !StepInst)
+ return None;
+
+ const SCEV *Step = IndDesc.getStep();
+ Value *StepInstOp1 = StepInst->getOperand(1);
+ Value *StepInstOp0 = StepInst->getOperand(0);
+ Value *StepValue = nullptr;
+ if (SE.getSCEV(StepInstOp1) == Step)
+ StepValue = StepInstOp1;
+ else if (SE.getSCEV(StepInstOp0) == Step)
+ StepValue = StepInstOp0;
+
+ Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+ if (!FinalIVValue)
+ return None;
+
+ return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+ SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+ BasicBlock *Latch = L.getLoopLatch();
+ assert(Latch && "Expecting valid latch");
+
+ BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+ assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+ ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+ assert(LatchCmpInst &&
+ "Expecting the latch compare instruction to be a CmpInst");
+
+ // Need to inverse the predicate when first successor is not the loop
+ // header
+ ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+ ? LatchCmpInst->getPredicate()
+ : LatchCmpInst->getInversePredicate();
+
+ if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+
+ // Need to flip strictness of the predicate when the latch compare instruction
+ // is not using StepInst
+ if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+ LatchCmpInst->getOperand(1) == &getStepInst())
+ return Pred;
+
+ // Cannot flip strictness of NE and EQ
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+ Direction D = getDirection();
+ if (D == Direction::Increasing)
+ return ICmpInst::ICMP_SLT;
+
+ if (D == Direction::Decreasing)
+ return ICmpInst::ICMP_SGT;
+
+ // If cannot determine the direction, then unable to find the canonical
+ // predicate
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+ if (const SCEVAddRecExpr *StepAddRecExpr =
+ dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+ if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+ if (SE.isKnownPositive(StepRecur))
+ return Direction::Increasing;
+ if (SE.isKnownNegative(StepRecur))
+ return Direction::Decreasing;
+ }
+
+ return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+ if (PHINode *IndVar = getInductionVariable(SE))
+ return LoopBounds::getBounds(*this, *IndVar, SE);
+
+ return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+ if (!isLoopSimplifyForm())
+ return nullptr;
+
+ BasicBlock *Header = getHeader();
+ assert(Header && "Expected a valid loop header");
+ ICmpInst *CmpInst = getLatchCmpInst(*this);
+ if (!CmpInst)
+ return nullptr;
+
+ Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+ Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+
+ for (PHINode &IndVar : Header->phis()) {
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
+ continue;
+
+ Instruction *StepInst = IndDesc.getInductionBinOp();
+
+ // case 1:
+ // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+ // StepInst = IndVar + step
+ // cmp = StepInst < FinalValue
+ if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
+ return &IndVar;
+
+ // case 2:
+ // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+ // StepInst = IndVar + step
+ // cmp = IndVar < FinalValue
+ if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
+ return &IndVar;
+ }
+
+ return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+ InductionDescriptor &IndDesc) const {
+ if (PHINode *IndVar = getInductionVariable(SE))
+ return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+ return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+ ScalarEvolution &SE) const {
+ // Located in the loop header
+ BasicBlock *Header = getHeader();
+ if (AuxIndVar.getParent() != Header)
+ return false;
+
+ // No uses outside of the loop
+ for (User *U : AuxIndVar.users())
+ if (const Instruction *I = dyn_cast<Instruction>(U))
+ if (!contains(I))
+ return false;
+
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+ return false;
+
+ // The step instruction opcode should be add or sub.
+ if (IndDesc.getInductionOpcode() != Instruction::Add &&
+ IndDesc.getInductionOpcode() != Instruction::Sub)
+ return false;
+
+ // Incremented by a loop invariant step for each loop iteration
+ return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+ InductionDescriptor IndDesc;
+ if (!getInductionDescriptor(SE, IndDesc))
+ return false;
+
+ ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+ if (!Init || !Init->isZero())
+ return false;
+
+ if (IndDesc.getInductionOpcode() != Instruction::Add)
+ return false;
+
+ ConstantInt *Step = IndDesc.getConstIntStepValue();
+ if (!Step || !Step->isOne())
+ return false;
+
+ return true;
+}
+
// Check that 'BB' doesn't have any uses outside of the 'L'
static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
DominatorTree &DT) {
@@ -200,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const {
bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
+ // FIXME: it should be ok to clone CallBrInst's if we correctly update the
+ // operand list to reflect the newly cloned labels.
for (BasicBlock *BB : this->blocks()) {
- if (isa<IndirectBrInst>(BB->getTerminator()))
+ if (isa<IndirectBrInst>(BB->getTerminator()) ||
+ isa<CallBrInst>(BB->getTerminator()))
return false;
for (Instruction &I : *BB)
@@ -242,48 +477,20 @@ void Loop::setLoopID(MDNode *LoopID) const {
assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
"Loop ID should refer to itself");
- BasicBlock *H = getHeader();
- for (BasicBlock *BB : this->blocks()) {
- Instruction *TI = BB->getTerminator();
- for (BasicBlock *Successor : successors(TI)) {
- if (Successor == H) {
- TI->setMetadata(LLVMContext::MD_loop, LoopID);
- break;
- }
- }
- }
+ SmallVector<BasicBlock *, 4> LoopLatches;
+ getLoopLatches(LoopLatches);
+ for (BasicBlock *BB : LoopLatches)
+ BB->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
}
void Loop::setLoopAlreadyUnrolled() {
- MDNode *LoopID = getLoopID();
- // First remove any existing loop unrolling metadata.
- SmallVector<Metadata *, 4> MDs;
- // Reserve first location for self reference to the LoopID metadata node.
- MDs.push_back(nullptr);
-
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- bool IsUnrollMetadata = false;
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (MD) {
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
- }
- if (!IsUnrollMetadata)
- MDs.push_back(LoopID->getOperand(i));
- }
- }
-
- // Add unroll(disable) metadata to disable future unrolling.
LLVMContext &Context = getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
+ MDNode *DisableUnrollMD =
+ MDNode::get(Context, MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *LoopID = getLoopID();
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, LoopID, {"llvm.loop.unroll."}, {DisableUnrollMD});
setLoopID(NewLoopID);
}
@@ -761,6 +968,46 @@ bool llvm::isValidAsAccessGroup(MDNode *Node) {
return Node->getNumOperands() == 0 && Node->isDistinct();
}
+MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
+ MDNode *OrigLoopID,
+ ArrayRef<StringRef> RemovePrefixes,
+ ArrayRef<MDNode *> AddAttrs) {
+ // First remove any existing loop metadata related to this transformation.
+ SmallVector<Metadata *, 4> MDs;
+
+ // Reserve first location for self reference to the LoopID metadata node.
+ TempMDTuple TempNode = MDNode::getTemporary(Context, None);
+ MDs.push_back(TempNode.get());
+
+ // Remove metadata for the transformation that has been applied or that became
+ // outdated.
+ if (OrigLoopID) {
+ for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) {
+ bool IsVectorMetadata = false;
+ Metadata *Op = OrigLoopID->getOperand(i);
+ if (MDNode *MD = dyn_cast<MDNode>(Op)) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (S)
+ IsVectorMetadata =
+ llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool {
+ return S->getString().startswith(Prefix);
+ });
+ }
+ if (!IsVectorMetadata)
+ MDs.push_back(Op);
+ }
+ }
+
+ // Add metadata to avoid reapplying a transformation, such as
+ // llvm.loop.unroll.disable and llvm.loop.isvectorized.
+ MDs.append(AddAttrs.begin(), AddAttrs.end());
+
+ MDNode *NewLoopID = MDNode::getDistinct(Context, MDs);
+ // Replace the temporary node with a self-reference.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ return NewLoopID;
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
@@ -792,7 +1039,7 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
}
void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index a68f114b83a0..4ab3798039d8 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -1,9 +1,8 @@
//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -210,6 +210,8 @@ bool LPPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
+ llvm::TimeTraceScope LoopPassScope("RunLoopPass", P->getPassName());
+
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
dumpRequiredSet(P);
@@ -384,16 +386,20 @@ void LoopPass::assignPassManager(PMStack &PMS,
LPPM->add(this);
}
+static std::string getDescription(const Loop &L) {
+ return "loop";
+}
+
bool LoopPass::skipLoop(const Loop *L) const {
const Function *F = L->getHeader()->getParent();
if (!F)
return false;
// Check the opt bisect limit.
- LLVMContext &Context = F->getContext();
- if (!Context.getOptPassGate().shouldRunPass(this, *L))
+ OptPassGate &Gate = F->getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(*L)))
return true;
// Check for the OptimizeNone attribute.
- if (F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F->hasOptNone()) {
// FIXME: Report this to dbgs() only once per function.
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function "
<< F->getName() << "\n");
diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp
index c8b91a7a1a51..1728b5e9f6d2 100644
--- a/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -1,9 +1,8 @@
//===- LoopUnrollAnalyzer.cpp - Unrolling Effect Estimation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 907b321b231a..6e1bb50e8893 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -1,9 +1,8 @@
//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 4a136c5a0c6d..77ebf89d9a08 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -1,9 +1,8 @@
//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,9 +53,10 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
for (auto &I: instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *PO = LI->getPointerOperand();
- if (isDereferenceablePointer(PO, DL))
+ if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
+ LI->getAlignment(), DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 686ad294378c..729dad463657 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -1,9 +1,8 @@
//===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -264,6 +263,19 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, AllocLike, TLI, LookThroughBitCast).hasValue();
}
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a functions is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
+ return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
@@ -359,19 +371,8 @@ const CallInst *llvm::extractCallocCall(const Value *I,
return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
}
-/// isFreeCall - Returns non-null if the value is a call to the builtin free()
-const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
- bool IsNoBuiltinCall;
- const Function *Callee =
- getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
- if (Callee == nullptr || IsNoBuiltinCall)
- return nullptr;
-
- StringRef FnName = Callee->getName();
- LibFunc TLIFn;
- if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
- return nullptr;
-
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
unsigned ExpectedNumParams;
if (TLIFn == LibFunc_free ||
TLIFn == LibFunc_ZdlPv || // operator delete(void*)
@@ -402,22 +403,39 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow)
ExpectedNumParams = 3;
else
- return nullptr;
+ return false;
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
- FunctionType *FTy = Callee->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
- return nullptr;
+ return false;
if (FTy->getNumParams() != ExpectedNumParams)
+ return false;
+ if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
+ return false;
+
+ return true;
+}
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+ bool IsNoBuiltinCall;
+ const Function *Callee =
+ getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+ if (Callee == nullptr || IsNoBuiltinCall)
return nullptr;
- if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+
+ StringRef FnName = Callee->getName();
+ LibFunc TLIFn;
+ if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return nullptr;
- return dyn_cast<CallInst>(I);
+ return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
}
+
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
@@ -442,10 +460,10 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
return true;
}
-ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool MustSucceed) {
+Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ bool MustSucceed) {
assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
"ObjectSize must be a call to llvm.objectsize!");
@@ -462,13 +480,35 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
EvalOptions.NullIsUnknownSize =
cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
- // FIXME: Does it make sense to just return a failure value if the size won't
- // fit in the output and `!MustSucceed`?
- uint64_t Size;
auto *ResultType = cast<IntegerType>(ObjectSize->getType());
- if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
- isUIntN(ResultType->getBitWidth(), Size))
- return ConstantInt::get(ResultType, Size);
+ bool StaticOnly = cast<ConstantInt>(ObjectSize->getArgOperand(3))->isZero();
+ if (StaticOnly) {
+ // FIXME: Does it make sense to just return a failure value if the size won't
+ // fit in the output and `!MustSucceed`?
+ uint64_t Size;
+ if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
+ isUIntN(ResultType->getBitWidth(), Size))
+ return ConstantInt::get(ResultType, Size);
+ } else {
+ LLVMContext &Ctx = ObjectSize->getFunction()->getContext();
+ ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions);
+ SizeOffsetEvalType SizeOffsetPair =
+ Eval.compute(ObjectSize->getArgOperand(0));
+
+ if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) {
+ IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL));
+ Builder.SetInsertPoint(ObjectSize);
+
+ // If we've outside the end of the object, then we can always access
+ // exactly 0 bytes.
+ Value *ResultSize =
+ Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
+ Value *UseZero =
+ Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
+ return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
+ ResultSize);
+ }
+ }
if (!MustSucceed)
return nullptr;
@@ -684,7 +724,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -743,9 +783,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
- bool RoundToAlign)
- : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
- RoundToAlign(RoundToAlign) {
+ ObjectSizeOpts EvalOpts)
+ : DL(DL), TLI(TLI), Context(Context),
+ Builder(Context, TargetFolder(DL),
+ IRBuilderCallbackInserter(
+ [&](Instruction *I) { InsertedInstructions.insert(I); })),
+ EvalOpts(EvalOpts) {
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
@@ -767,17 +810,21 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
CacheMap.erase(CacheIt);
}
+
+ // Erase any instructions we inserted as part of the traversal.
+ for (Instruction *I : InsertedInstructions) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
}
SeenVals.clear();
+ InsertedInstructions.clear();
return Result;
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOpts ObjSizeOptions;
- ObjSizeOptions.RoundToAlign = RoundToAlign;
-
- ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -916,24 +963,28 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
if (!bothKnown(EdgeData)) {
OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
OffsetPHI->eraseFromParent();
+ InsertedInstructions.erase(OffsetPHI);
SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
SizePHI->eraseFromParent();
+ InsertedInstructions.erase(SizePHI);
return unknown();
}
SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
}
- Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp;
- if ((Tmp = SizePHI->hasConstantValue())) {
+ Value *Size = SizePHI, *Offset = OffsetPHI;
+ if (Value *Tmp = SizePHI->hasConstantValue()) {
Size = Tmp;
SizePHI->replaceAllUsesWith(Size);
SizePHI->eraseFromParent();
+ InsertedInstructions.erase(SizePHI);
}
- if ((Tmp = OffsetPHI->hasConstantValue())) {
+ if (Value *Tmp = OffsetPHI->hasConstantValue()) {
Offset = Tmp;
OffsetPHI->replaceAllUsesWith(Offset);
OffsetPHI->eraseFromParent();
+ InsertedInstructions.erase(OffsetPHI);
}
return std::make_pair(Size, Offset);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index e22182b99e11..b25b655165d7 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -327,7 +326,8 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB) {
MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,7 +338,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
}
}
MemDepResult SimpleDep = getSimplePointerDependencyFrom(
- MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+ MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB);
if (SimpleDep.isDef())
return SimpleDep;
// Non-local invariant group dependency indicates there is non local Def
@@ -439,14 +439,13 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB) {
bool isInvariantLoad = false;
- if (!Limit) {
- unsigned DefaultLimit = BlockScanLimit;
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
- &DefaultLimit);
- }
+ unsigned DefaultLimit = BlockScanLimit;
+ if (!Limit)
+ Limit = &DefaultLimit;
// We must be careful with atomic accesses, as they may allow another thread
// to touch this location, clobbering it. We are conservative: if the
@@ -488,11 +487,14 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const DataLayout &DL = BB->getModule()->getDataLayout();
- // Create a numbered basic block to lazily compute and cache instruction
+ // If the caller did not provide an ordered basic block,
+ // create one to lazily compute and cache instruction
// positions inside a BB. This is used to provide fast queries for relative
// position between two instructions in a BB and can be used by
// AliasAnalysis::callCapturesBefore.
- OrderedBasicBlock OBB(BB);
+ OrderedBasicBlock OBBTmp(BB);
+ if (!OBB)
+ OBB = &OBBTmp;
// Return "true" if and only if the instruction I is either a non-simple
// load or a non-simple store.
@@ -673,7 +675,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// A release fence requires that all stores complete before it, but does
// not prevent the reordering of following loads or stores 'before' the
// fence. As a result, we look past it when finding a dependency for
- // loads. DSE uses this to find preceeding stores to delete and thus we
+ // loads. DSE uses this to find preceding stores to delete and thus we
// can't bypass the fence if the query instruction is a store.
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
@@ -683,7 +685,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
if (isModAndRefSet(MR))
- MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
+ MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB);
switch (clearMust(MR)) {
case ModRefInfo::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
@@ -709,7 +711,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
return MemDepResult::getNonFuncLocal();
}
-MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
+MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst,
+ OrderedBasicBlock *OBB) {
Instruction *ScanPos = QueryInst;
// Check for a cached result
@@ -747,8 +750,9 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
- LocalCache = getPointerDependencyFrom(
- MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
+ LocalCache =
+ getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
+ QueryParent, QueryInst, nullptr, OBB);
} else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
bool isReadOnly = AA.onlyReadsMemory(QueryCall);
LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
index 27e8d72b8e89..163830eee797 100644
--- a/lib/Analysis/MemoryLocation.cpp
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -1,9 +1,8 @@
//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 6a5567ed765b..17f5d9b9f0ad 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -1,9 +1,8 @@
//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,6 +81,11 @@ bool llvm::VerifyMemorySSA = true;
#else
bool llvm::VerifyMemorySSA = false;
#endif
+/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
+cl::opt<bool> llvm::EnableMSSALoopDependency(
+ "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+ cl::desc("Enable MemorySSA dependency for loop pass manager"));
+
static cl::opt<bool, true>
VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
cl::Hidden, cl::desc("Enable verification of MemorySSA."));
@@ -252,10 +256,10 @@ struct ClobberAlias {
// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
// ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
- const MemoryLocation &UseLoc,
- const Instruction *UseInst,
- AliasAnalysis &AA) {
+template <typename AliasAnalysisType>
+static ClobberAlias
+instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
+ const Instruction *UseInst, AliasAnalysisType &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
const auto *UseCall = dyn_cast<CallBase>(UseInst);
@@ -300,10 +304,11 @@ static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
return {isModSet(I), AR};
}
+template <typename AliasAnalysisType>
static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
const MemoryUseOrDef *MU,
const MemoryLocOrCall &UseMLOC,
- AliasAnalysis &AA) {
+ AliasAnalysisType &AA) {
// FIXME: This is a temporary hack to allow a single instructionClobbersQuery
// to exist while MemoryLocOrCall is pushed through places.
if (UseMLOC.IsCall)
@@ -346,12 +351,12 @@ struct UpwardsMemoryQuery {
} // end anonymous namespace
static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
- AliasAnalysis &AA) {
+ BatchAAResults &AA) {
Instruction *Inst = MD->getMemoryInst();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_end:
- return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
+ return AA.alias(MemoryLocation(II->getArgOperand(1)), Loc) == MustAlias;
default:
return false;
}
@@ -359,13 +364,14 @@ static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
return false;
}
-static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
+template <typename AliasAnalysisType>
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
- AA.pointsToConstantMemory(cast<LoadInst>(I)->
- getPointerOperand()));
+ AA.pointsToConstantMemory(MemoryLocation(
+ cast<LoadInst>(I)->getPointerOperand())));
}
/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
@@ -381,10 +387,12 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
/// \param Query The UpwardsMemoryQuery we used for our search.
/// \param AA The AliasAnalysis we used for our search.
/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
-static void
+
+template <typename AliasAnalysisType>
+LLVM_ATTRIBUTE_UNUSED static void
checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
const MemoryLocation &StartLoc, const MemorySSA &MSSA,
- const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+ const UpwardsMemoryQuery &Query, AliasAnalysisType &AA,
bool AllowImpreciseClobber = false) {
assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
@@ -474,7 +482,7 @@ namespace {
/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
/// in one class.
-class ClobberWalker {
+template <class AliasAnalysisType> class ClobberWalker {
/// Save a few bytes by using unsigned instead of size_t.
using ListIndex = unsigned;
@@ -498,9 +506,10 @@ class ClobberWalker {
};
const MemorySSA &MSSA;
- AliasAnalysis &AA;
+ AliasAnalysisType &AA;
DominatorTree &DT;
UpwardsMemoryQuery *Query;
+ unsigned *UpwardWalkLimit;
// Phi optimization bookkeeping
SmallVector<DefPath, 32> Paths;
@@ -539,6 +548,16 @@ class ClobberWalker {
walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
const MemoryAccess *SkipStopAt = nullptr) const {
assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
+ assert(UpwardWalkLimit && "Need a valid walk limit");
+ bool LimitAlreadyReached = false;
+ // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set
+ // it to 1. This will not do any alias() calls. It either returns in the
+ // first iteration in the loop below, or is set back to 0 if all def chains
+ // are free of MemoryDefs.
+ if (!*UpwardWalkLimit) {
+ *UpwardWalkLimit = 1;
+ LimitAlreadyReached = true;
+ }
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
@@ -548,6 +567,10 @@ class ClobberWalker {
if (auto *MD = dyn_cast<MemoryDef>(Current)) {
if (MSSA.isLiveOnEntryDef(MD))
return {MD, true, MustAlias};
+
+ if (!--*UpwardWalkLimit)
+ return {Current, true, MayAlias};
+
ClobberAlias CA =
instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA);
if (CA.IsClobber)
@@ -555,6 +578,9 @@ class ClobberWalker {
}
}
+ if (LimitAlreadyReached)
+ *UpwardWalkLimit = 0;
+
assert(isa<MemoryPhi>(Desc.Last) &&
"Ended at a non-clobber that's not a phi?");
return {Desc.Last, false, MayAlias};
@@ -626,10 +652,12 @@ class ClobberWalker {
SkipStopWhere = Query->OriginalAccess;
}
- UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+ UpwardsWalkResult Res = walkToPhiOrClobber(Node,
+ /*StopAt=*/StopWhere,
/*SkipStopAt=*/SkipStopWhere);
if (Res.IsKnownClobber) {
assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
+
// If this wasn't a cache hit, we hit a clobber when walking. That's a
// failure.
TerminatedPath Term{Res.Result, PathIndex};
@@ -662,7 +690,7 @@ class ClobberWalker {
struct generic_def_path_iterator
: public iterator_facade_base<generic_def_path_iterator<T, Walker>,
std::forward_iterator_tag, T *> {
- generic_def_path_iterator() = default;
+ generic_def_path_iterator() {}
generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
T &operator*() const { return curNode(); }
@@ -887,13 +915,19 @@ class ClobberWalker {
}
public:
- ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
+ ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT)
: MSSA(MSSA), AA(AA), DT(DT) {}
+ AliasAnalysisType *getAA() { return &AA; }
/// Finds the nearest clobber for the given query, optimizing phis if
/// possible.
- MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
+ MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
+ unsigned &UpWalkLimit) {
Query = &Q;
+ UpwardWalkLimit = &UpWalkLimit;
+ // Starting limit must be > 0.
+ if (!UpWalkLimit)
+ UpWalkLimit++;
MemoryAccess *Current = Start;
// This walker pretends uses don't exist. If we're handed one, silently grab
@@ -918,13 +952,11 @@ public:
}
#ifdef EXPENSIVE_CHECKS
- if (!Q.SkipSelfAccess)
+ if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0)
checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
#endif
return Result;
}
-
- void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
};
struct RenamePassData {
@@ -947,77 +979,99 @@ struct RenamePassData {
namespace llvm {
-class MemorySSA::ClobberWalkerBase {
- ClobberWalker Walker;
+template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase {
+ ClobberWalker<AliasAnalysisType> Walker;
MemorySSA *MSSA;
public:
- ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+ ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D)
: Walker(*M, *A, *D), MSSA(M) {}
MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
- const MemoryLocation &);
- // Second argument (bool), defines whether the clobber search should skip the
+ const MemoryLocation &,
+ unsigned &);
+ // Third argument (bool), defines whether the clobber search should skip the
// original queried access. If true, there will be a follow-up query searching
// for a clobber access past "self". Note that the Optimized access is not
// updated if a new clobber is found by this SkipSelf search. If this
// additional query becomes heavily used we may decide to cache the result.
// Walker instantiations will decide how to set the SkipSelf bool.
- MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
- void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
};
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
/// longer does caching on its own, but the name has been retained for the
/// moment.
+template <class AliasAnalysisType>
class MemorySSA::CachingWalker final : public MemorySSAWalker {
- ClobberWalkerBase *Walker;
+ ClobberWalkerBase<AliasAnalysisType> *Walker;
public:
- CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+ CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
: MemorySSAWalker(M), Walker(W) {}
~CachingWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, false);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) override;
+ const MemoryLocation &Loc,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+ }
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+ }
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+ }
void invalidateInfo(MemoryAccess *MA) override {
if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MUD->resetOptimized();
}
-
- void verify(const MemorySSA *MSSA) override {
- MemorySSAWalker::verify(MSSA);
- Walker->verify(MSSA);
- }
};
+template <class AliasAnalysisType>
class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
- ClobberWalkerBase *Walker;
+ ClobberWalkerBase<AliasAnalysisType> *Walker;
public:
- SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+ SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
: MemorySSAWalker(M), Walker(W) {}
~SkipSelfWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, true);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) override;
+ const MemoryLocation &Loc,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+ }
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+ }
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+ }
void invalidateInfo(MemoryAccess *MA) override {
if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MUD->resetOptimized();
}
-
- void verify(const MemorySSA *MSSA) override {
- MemorySSAWalker::verify(MSSA);
- Walker->verify(MSSA);
- }
};
} // end namespace llvm
@@ -1071,6 +1125,8 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
SmallPtrSetImpl<BasicBlock *> &Visited,
bool SkipVisited, bool RenameAllUses) {
+ assert(Root && "Trying to rename accesses in an unreachable block");
+
SmallVector<RenamePassData, 32> WorkStack;
// Skip everything if we already renamed this block and we are skipping.
// Note: You can't sink this into the if, because we need it to occur
@@ -1154,9 +1210,20 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
}
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
- : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+ : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
SkipWalker(nullptr), NextID(0) {
- buildMemorySSA();
+ // Build MemorySSA using a batch alias analysis. This reuses the internal
+ // state that AA collects during an alias()/getModRefInfo() call. This is
+ // safe because there are no CFG changes while building MemorySSA and can
+ // significantly reduce the time spent by the compiler in AA, because we will
+ // make queries about all the instructions in the Function.
+ BatchAAResults BatchAA(*AA);
+ buildMemorySSA(BatchAA);
+ // Intentionally leave AA to nullptr while building so we don't accidently
+ // use non-batch AliasAnalysis.
+ this->AA = AA;
+ // Also create the walker here.
+ getWalker();
}
MemorySSA::~MemorySSA() {
@@ -1193,11 +1260,9 @@ namespace llvm {
/// which is walking bottom-up.
class MemorySSA::OptimizeUses {
public:
- OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
- DominatorTree *DT)
- : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
- Walker = MSSA->getWalker();
- }
+ OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker,
+ BatchAAResults *BAA, DominatorTree *DT)
+ : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {}
void optimizeUses();
@@ -1225,8 +1290,8 @@ private:
DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
MemorySSA *MSSA;
- MemorySSAWalker *Walker;
- AliasAnalysis *AA;
+ CachingWalker<BatchAAResults> *Walker;
+ BatchAAResults *AA;
DominatorTree *DT;
};
@@ -1343,11 +1408,12 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
continue;
}
bool FoundClobberResult = false;
+ unsigned UpwardWalkLimit = MaxCheckLimit;
while (UpperBound > LocInfo.LowerBound) {
if (isa<MemoryPhi>(VersionStack[UpperBound])) {
// For phis, use the walker, see where we ended up, go there
- Instruction *UseInst = MU->getMemoryInst();
- MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
+ MemoryAccess *Result =
+ Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
// We are guaranteed to find it or something is wrong
while (VersionStack[UpperBound] != Result) {
assert(UpperBound != 0);
@@ -1423,7 +1489,7 @@ void MemorySSA::placePHINodes(
createMemoryPhi(BB);
}
-void MemorySSA::buildMemorySSA() {
+void MemorySSA::buildMemorySSA(BatchAAResults &BAA) {
// We create an access to represent "live on entry", for things like
// arguments or users of globals, where the memory they use is defined before
// the beginning of the function. We do not actually insert it into the IR.
@@ -1445,7 +1511,7 @@ void MemorySSA::buildMemorySSA() {
AccessList *Accesses = nullptr;
DefsList *Defs = nullptr;
for (Instruction &I : B) {
- MemoryUseOrDef *MUD = createNewAccess(&I);
+ MemoryUseOrDef *MUD = createNewAccess(&I, &BAA);
if (!MUD)
continue;
@@ -1469,9 +1535,9 @@ void MemorySSA::buildMemorySSA() {
SmallPtrSet<BasicBlock *, 16> Visited;
renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
- CachingWalker *Walker = getWalkerImpl();
-
- OptimizeUses(this, Walker, AA, DT).optimizeUses();
+ ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT);
+ CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
+ OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses();
// Mark the uses in unreachable blocks as live on entry, so that they go
// somewhere.
@@ -1482,14 +1548,16 @@ void MemorySSA::buildMemorySSA() {
MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
-MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
+MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
if (Walker)
return Walker.get();
if (!WalkerBase)
- WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+ WalkerBase =
+ llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
- Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
+ Walker =
+ llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
return Walker.get();
}
@@ -1498,9 +1566,11 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
return SkipWalker.get();
if (!WalkerBase)
- WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+ WalkerBase =
+ llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
- SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+ SkipWalker =
+ llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
return SkipWalker.get();
}
@@ -1619,7 +1689,7 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
MemoryAccess *Definition,
const MemoryUseOrDef *Template) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
- MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
+ MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
assert(
NewAccess != nullptr &&
"Tried to create a memory access for a non-memory touching instruction");
@@ -1642,7 +1712,9 @@ static inline bool isOrdered(const Instruction *I) {
}
/// Helper function to create new memory accesses
+template <typename AliasAnalysisType>
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+ AliasAnalysisType *AAP,
const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
// that it writes arbitrarily. Ignore that fake memory dependency here.
@@ -1657,7 +1729,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
#if !defined(NDEBUG)
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ ModRefInfo ModRef = AAP->getModRefInfo(I, None);
bool DefCheck, UseCheck;
DefCheck = isModSet(ModRef) || isOrdered(I);
UseCheck = isRefSet(ModRef);
@@ -1665,7 +1737,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
#endif
} else {
// Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ ModRefInfo ModRef = AAP->getModRefInfo(I, None);
// The isOrdered check is used to ensure that volatiles end up as defs
// (atomics end up as ModRef right now anyway). Until we separate the
// ordering chain from the memory chain, this enables people to see at least
@@ -1718,7 +1790,7 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) {
MUD->setDefiningAccess(nullptr);
// Invalidate our walker's cache if necessary
if (!isa<MemoryUse>(MA))
- Walker->invalidateInfo(MA);
+ getWalker()->invalidateInfo(MA);
Value *MemoryInst;
if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
@@ -1778,35 +1850,16 @@ void MemorySSA::verifyMemorySSA() const {
verifyDomination(F);
verifyOrdering(F);
verifyDominationNumbers(F);
- Walker->verify(this);
- verifyClobberSanity(F);
-}
-
-/// Check sanity of the clobbering instruction for access MA.
-void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
- if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
- if (!MUD->isOptimized())
- return;
- auto *I = MUD->getMemoryInst();
- auto Loc = MemoryLocation::getOrNone(I);
- if (Loc == None)
- return;
- auto *Clobber = MUD->getOptimized();
- UpwardsMemoryQuery Q(I, MUD);
- checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
- }
-}
-
-void MemorySSA::verifyClobberSanity(const Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
- for (const BasicBlock &BB : F) {
- const AccessList *Accesses = getBlockAccesses(&BB);
- if (!Accesses)
- continue;
- for (const MemoryAccess &MA : *Accesses)
- checkClobberSanityAccess(&MA);
- }
-#endif
+ // Previously, the verification used to also verify that the clobberingAccess
+ // cached by MemorySSA is the same as the clobberingAccess found at a later
+ // query to AA. This does not hold true in general due to the current fragility
+ // of BasicAA which has arbitrary caps on the things it analyzes before giving
+ // up. As a result, transformations that are correct, will lead to BasicAA
+ // returning different Alias answers before and after that transformation.
+ // Invalidating MemorySSA is not an option, as the results in BasicAA can be so
+ // random, in the worst case we'd need to rebuild MemorySSA from scratch after
+ // every transformation, which defeats the purpose of using it. For such an
+ // example, see test4 added in D51960.
}
/// Verify that all of the blocks we believe to have valid domination numbers
@@ -2162,6 +2215,15 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
}
+bool MemorySSAAnalysis::Result::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ auto PAC = PA.getChecker<MemorySSAAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+ Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA);
+}
+
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
OS << "MemorySSA for function: " << F.getName() << "\n";
@@ -2210,8 +2272,11 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
/// the MemoryAccess that actually clobbers Loc.
///
/// \returns our clobbering memory access
-MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
- MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
+template <typename AliasAnalysisType>
+MemoryAccess *
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+ MemoryAccess *StartingAccess, const MemoryLocation &Loc,
+ unsigned &UpwardWalkLimit) {
if (isa<MemoryPhi>(StartingAccess))
return StartingAccess;
@@ -2239,7 +2304,8 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
? StartingUseOrDef->getDefiningAccess()
: StartingUseOrDef;
- MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
+ MemoryAccess *Clobber =
+ Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2247,9 +2313,10 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
return Clobber;
}
+template <typename AliasAnalysisType>
MemoryAccess *
-MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
- bool SkipSelf) {
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+ MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
@@ -2275,7 +2342,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
UpwardsMemoryQuery Q(I, StartingAccess);
- if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
+ if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
StartingAccess->setOptimized(LiveOnEntry);
StartingAccess->setOptimizedAccessType(None);
@@ -2295,7 +2362,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
return DefiningAccess;
}
- OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+ OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
StartingAccess->setOptimized(OptimizedAccess);
if (MSSA->isLiveOnEntryDef(OptimizedAccess))
StartingAccess->setOptimizedAccessType(None);
@@ -2311,10 +2378,10 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
MemoryAccess *Result;
if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
- isa<MemoryDef>(StartingAccess)) {
+ isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) {
assert(isa<MemoryDef>(Q.OriginalAccess));
Q.SkipSelfAccess = true;
- Result = Walker.findClobber(OptimizedAccess, Q);
+ Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit);
} else
Result = OptimizedAccess;
@@ -2325,28 +2392,6 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
}
MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- return Walker->getClobberingMemoryAccessBase(MA, false);
-}
-
-MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) {
- return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- return Walker->getClobberingMemoryAccessBase(MA, true);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) {
- return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
-MemoryAccess *
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
return Use->getDefiningAccess();
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index 6c817d203684..4c1feee7fd9a 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -1,9 +1,8 @@
//===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------===//
//
@@ -73,7 +72,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
for (auto *Pred : predecessors(BB))
- PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+ if (MSSA->DT->isReachableFromEntry(Pred))
+ PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+ else
+ PhiOps.push_back(MSSA->getLiveOnEntryDef());
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
@@ -157,8 +159,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
auto *Defs = MSSA->getWritableBlockDefs(BB);
- if (Defs)
+ if (Defs) {
+ CachedPreviousDef.insert({BB, &*Defs->rbegin()});
return &*Defs->rbegin();
+ }
return getPreviousDefRecursive(BB, CachedPreviousDef);
}
@@ -270,6 +274,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Also make sure we skip ourselves to avoid self references.
if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
continue;
+ // Defs are automatically unoptimized when the user is set to MD below,
+ // because the isOptimized() call will fail to find the same ID.
U.set(MD);
}
}
@@ -277,6 +283,9 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// and that def is now our defining access.
MD->setDefiningAccess(DefBefore);
+ // Remember the index where we may insert new phis below.
+ unsigned NewPhiIndex = InsertedPHIs.size();
+
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
@@ -290,9 +299,56 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// backwards to find the def. To make that work, we'd have to track whether
// getDefRecursive only ever used the single predecessor case. These types
// of paths also only exist in between CFG simplifications.
+
+ // If this is the first def in the block and this insert is in an arbitrary
+ // place, compute IDF and place phis.
+ auto Iter = MD->getDefsIterator();
+ ++Iter;
+ auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
+ if (Iter == IterEnd) {
+ ForwardIDFCalculator IDFs(*MSSA->DT);
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+ DefiningBlocks.insert(MD->getBlock());
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+ for (auto *BBIDF : IDFBlocks)
+ if (!MSSA->getMemoryAccess(BBIDF)) {
+ auto *MPhi = MSSA->createMemoryPhi(BBIDF);
+ NewInsertedPHIs.push_back(MPhi);
+ // Add the phis created into the IDF blocks to NonOptPhis, so they are
+ // not optimized out as trivial by the call to getPreviousDefFromEnd
+ // below. Once they are complete, all these Phis are added to the
+ // FixupList, and removed from NonOptPhis inside fixupDefs().
+ NonOptPhis.insert(MPhi);
+ }
+
+ for (auto &MPhi : NewInsertedPHIs) {
+ auto *BBIDF = MPhi->getBlock();
+ for (auto *Pred : predecessors(BBIDF)) {
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+ MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
+ Pred);
+ }
+ }
+
+ // Re-take the index where we're adding the new phis, because the above
+ // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+ NewPhiIndex = InsertedPHIs.size();
+ for (auto &MPhi : NewInsertedPHIs) {
+ InsertedPHIs.push_back(&*MPhi);
+ FixupList.push_back(&*MPhi);
+ }
+ }
+
FixupList.push_back(MD);
}
+ // Remember the index where we stopped inserting new phis above, since the
+ // fixupDefs call in the loop below may insert more, that are already minimal.
+ unsigned NewPhiIndexEnd = InsertedPHIs.size();
+
while (!FixupList.empty()) {
unsigned StartingPHISize = InsertedPHIs.size();
fixupDefs(FixupList);
@@ -300,6 +356,12 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Put any new phis on the fixup list, and process them
FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end());
}
+
+ // Optimize potentially non-minimal phis added in this method.
+ unsigned NewPhiSize = NewPhiIndexEnd - NewPhiIndex;
+ if (NewPhiSize)
+ tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
+
// Now that all fixups are done, rename all uses if we are asked.
if (RenameUses) {
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -401,8 +463,8 @@ void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
}
}
-void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
- BasicBlock *To) {
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+ const BasicBlock *To) {
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
bool Found = false;
MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
@@ -420,7 +482,8 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
const ValueToValueMapTy &VMap,
- PhiToDefMap &MPhiMap) {
+ PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified) {
auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
MemoryAccess *InsnDefining = MA;
if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
@@ -450,16 +513,60 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
// instructions. This occurs in LoopRotate when cloning instructions
// from the old header to the old preheader. The cloned instruction may
// also be a simplified Value, not an Instruction (see LoopRotate).
+ // Also in LoopRotate, even when it's an instruction, due to it being
+ // simplified, it may be a Use rather than a Def, so we cannot use MUD as
+ // template. Calls coming from updateForClonedBlockIntoPred, ensure this.
if (Instruction *NewInsn =
dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
- NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+ NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
+ CloneWasSimplified ? nullptr : MUD);
MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
}
}
}
}
+void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
+ BasicBlock *Header, BasicBlock *Preheader, BasicBlock *BEBlock) {
+ auto *MPhi = MSSA->getMemoryAccess(Header);
+ if (!MPhi)
+ return;
+
+ // Create phi node in the backedge block and populate it with the same
+ // incoming values as MPhi. Skip incoming values coming from Preheader.
+ auto *NewMPhi = MSSA->createMemoryPhi(BEBlock);
+ bool HasUniqueIncomingValue = true;
+ MemoryAccess *UniqueValue = nullptr;
+ for (unsigned I = 0, E = MPhi->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *IBB = MPhi->getIncomingBlock(I);
+ MemoryAccess *IV = MPhi->getIncomingValue(I);
+ if (IBB != Preheader) {
+ NewMPhi->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (!UniqueValue)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Update incoming edges into MPhi. Remove all but the incoming edge from
+ // Preheader. Add an edge from NewMPhi
+ auto *AccFromPreheader = MPhi->getIncomingValueForBlock(Preheader);
+ MPhi->setIncomingValue(0, AccFromPreheader);
+ MPhi->setIncomingBlock(0, Preheader);
+ for (unsigned I = MPhi->getNumIncomingValues() - 1; I >= 1; --I)
+ MPhi->unorderedDeleteIncoming(I);
+ MPhi->addIncoming(NewMPhi, BEBlock);
+
+ // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
+ // replaced with the unique value.
+ if (HasUniqueIncomingValue)
+ removeMemoryAccess(NewMPhi);
+}
+
void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
ArrayRef<BasicBlock *> ExitBlocks,
const ValueToValueMapTy &VMap,
@@ -543,10 +650,13 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred(
// Defs from BB being used in BB will be replaced with the cloned defs from
// VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
// incoming def into the Phi from P1.
+ // Instructions cloned into the predecessor are in practice sometimes
+ // simplified, so disable the use of the template, and create an access from
+ // scratch.
PhiToDefMap MPhiMap;
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
- cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+ cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true);
}
template <typename Iter>
@@ -599,7 +709,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
if (!RevDeleteUpdates.empty()) {
// Update for inserted edges: use newDT and snapshot CFG as if deletes had
- // not occured.
+ // not occurred.
// FIXME: This creates a new DT, so it's more expensive to do mix
// delete/inserts vs just inserts. We can do an incremental update on the DT
// to revert deletes, than re-delete the edges. Teaching DT to do this, is
@@ -697,7 +807,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
// Map a BB to its predecessors: added + previously existing. To get a
// deterministic order, store predecessors as SetVectors. The order in each
- // will be defined by teh order in Updates (fixed) and the order given by
+ // will be defined by the order in Updates (fixed) and the order given by
// children<> (also fixed). Since we further iterate over these ordered sets,
// we lose the information of multiple edges possibly existing between two
// blocks, so we'll keep and EdgeCount map for that.
@@ -756,15 +866,15 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (auto *BB : NewBlocks)
PredMap.erase(BB);
- SmallVector<BasicBlock *, 8> BlocksToProcess;
SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+ SmallVector<WeakVH, 8> InsertedPhis;
// First create MemoryPhis in all blocks that don't have one. Create in the
// order found in Updates, not in PredMap, to get deterministic numbering.
for (auto &Edge : Updates) {
BasicBlock *BB = Edge.getTo();
if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
- MSSA->createMemoryPhi(BB);
+ InsertedPhis.push_back(MSSA->createMemoryPhi(BB));
}
// Now we'll fill in the MemoryPhis with the right incoming values.
@@ -831,10 +941,6 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (auto *Pred : PrevBlockSet)
for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
NewPhi->addIncoming(DefP1, Pred);
-
- // Insert BB in the set of blocks that now have definition. We'll use this
- // to compute IDF and add Phis there next.
- BlocksToProcess.push_back(BB);
}
// Get all blocks that used to dominate BB and no longer do after adding
@@ -849,22 +955,41 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
}
+ tryRemoveTrivialPhis(InsertedPhis);
+ // Create the set of blocks that now have a definition. We'll use this to
+ // compute IDF and add Phis there next.
+ SmallVector<BasicBlock *, 8> BlocksToProcess;
+ for (auto &VH : InsertedPhis)
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+ BlocksToProcess.push_back(MPhi->getBlock());
+
// Compute IDF and add Phis in all IDF blocks that do not have one.
SmallVector<BasicBlock *, 32> IDFBlocks;
if (!BlocksToProcess.empty()) {
- ForwardIDFCalculator IDFs(DT);
+ ForwardIDFCalculator IDFs(DT, GD);
SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
BlocksToProcess.end());
IDFs.setDefiningBlocks(DefiningBlocks);
IDFs.calculate(IDFBlocks);
+
+ SmallSetVector<MemoryPhi *, 4> PhisToFill;
+ // First create all needed Phis.
+ for (auto *BBIDF : IDFBlocks)
+ if (!MSSA->getMemoryAccess(BBIDF)) {
+ auto *IDFPhi = MSSA->createMemoryPhi(BBIDF);
+ InsertedPhis.push_back(IDFPhi);
+ PhisToFill.insert(IDFPhi);
+ }
+ // Then update or insert their correct incoming values.
for (auto *BBIDF : IDFBlocks) {
- if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+ auto *IDFPhi = MSSA->getMemoryAccess(BBIDF);
+ assert(IDFPhi && "Phi must exist");
+ if (!PhisToFill.count(IDFPhi)) {
// Update existing Phi.
// FIXME: some updates may be redundant, try to optimize and skip some.
for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
} else {
- IDFPhi = MSSA->createMemoryPhi(BBIDF);
for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
BasicBlock *Pi = Pair.second;
IDFPhi->addIncoming(GetLastDef(Pi), Pi);
@@ -907,6 +1032,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
}
}
}
+ tryRemoveTrivialPhis(InsertedPhis);
}
// Move What before Where in the MemorySSA IR.
@@ -1052,7 +1178,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
}
}
-void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
+void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
assert(!MSSA->isLiveOnEntryDef(MA) &&
"Trying to remove the live on entry def");
// We can only delete phi nodes if they have no uses, or we can replace all
@@ -1071,6 +1197,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
}
+ SmallSetVector<MemoryPhi *, 4> PhisToCheck;
+
// Re-point the uses at our defining access
if (!isa<MemoryUse>(MA) && !MA->use_empty()) {
// Reset optimized on users of this store, and reset the uses.
@@ -1090,6 +1218,9 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
Use &U = *MA->use_begin();
if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
MUD->resetOptimized();
+ if (OptimizePhis)
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U.getUser()))
+ PhisToCheck.insert(MP);
U.set(NewDefTarget);
}
}
@@ -1098,10 +1229,25 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
// are doing things here
MSSA->removeFromLookups(MA);
MSSA->removeFromLists(MA);
+
+ // Optionally optimize Phi uses. This will recursively remove trivial phis.
+ if (!PhisToCheck.empty()) {
+ SmallVector<WeakVH, 16> PhisToOptimize{PhisToCheck.begin(),
+ PhisToCheck.end()};
+ PhisToCheck.clear();
+
+ unsigned PhisSize = PhisToOptimize.size();
+ while (PhisSize-- > 0)
+ if (MemoryPhi *MP =
+ cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
+ auto OperRange = MP->operands();
+ tryRemoveTrivialPhi(MP, OperRange);
+ }
+ }
}
void MemorySSAUpdater::removeBlocks(
- const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
+ const SmallSetVector<BasicBlock *, 8> &DeadBlocks) {
// First delete all uses of BB in MemoryPhis.
for (BasicBlock *BB : DeadBlocks) {
Instruction *TI = BB->getTerminator();
@@ -1133,6 +1279,51 @@ void MemorySSAUpdater::removeBlocks(
}
}
+void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
+ for (auto &VH : UpdatedPHIs)
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
+ auto OperRange = MPhi->operands();
+ tryRemoveTrivialPhi(MPhi, OperRange);
+ }
+}
+
+void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
+ const BasicBlock *BB = I->getParent();
+ // Remove memory accesses in BB for I and all following instructions.
+ auto BBI = I->getIterator(), BBE = BB->end();
+ // FIXME: If this becomes too expensive, iterate until the first instruction
+ // with a memory access, then iterate over MemoryAccesses.
+ while (BBI != BBE)
+ removeMemoryAccess(&*(BBI++));
+ // Update phis in BB's successors to remove BB.
+ SmallVector<WeakVH, 16> UpdatedPHIs;
+ for (const BasicBlock *Successor : successors(BB)) {
+ removeDuplicatePhiEdgesBetween(BB, Successor);
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Successor)) {
+ MPhi->unorderedDeleteIncomingBlock(BB);
+ UpdatedPHIs.push_back(MPhi);
+ }
+ }
+ // Optimize trivial phis.
+ tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
+void MemorySSAUpdater::changeCondBranchToUnconditionalTo(const BranchInst *BI,
+ const BasicBlock *To) {
+ const BasicBlock *BB = BI->getParent();
+ SmallVector<WeakVH, 16> UpdatedPHIs;
+ for (const BasicBlock *Succ : successors(BB)) {
+ removeDuplicatePhiEdgesBetween(BB, Succ);
+ if (Succ != To)
+ if (auto *MPhi = MSSA->getMemoryAccess(Succ)) {
+ MPhi->unorderedDeleteIncomingBlock(BB);
+ UpdatedPHIs.push_back(MPhi);
+ }
+ }
+ // Optimize trivial phis.
+ tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
MemorySSA::InsertionPlace Point) {
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 1e321f17d59f..519242759824 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 87f76d43bb1e..e25eb290a665 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -71,6 +70,11 @@ cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
"all-non-critical", "All non-critical edges."),
clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
+cl::opt<std::string> ModuleSummaryDotFile(
+ "module-summary-dot-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("File to emit dot graph of new summary into."));
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -227,6 +231,13 @@ static bool isNonVolatileLoad(const Instruction *I) {
return false;
}
+static bool isNonVolatileStore(const Instruction *I) {
+ if (const auto *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+
+ return false;
+}
+
static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
const Function &F, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, DominatorTree &DT,
@@ -241,7 +252,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
- SetVector<ValueInfo> RefEdges;
+ SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
SetVector<GlobalValue::GUID> TypeTests;
SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
TypeCheckedLoadVCalls;
@@ -254,6 +265,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// list.
findRefEdges(Index, &F, RefEdges, Visited);
std::vector<const Instruction *> NonVolatileLoads;
+ std::vector<const Instruction *> NonVolatileStores;
bool HasInlineAsmMaybeReferencingInternal = false;
for (const BasicBlock &BB : F)
@@ -261,12 +273,34 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (isa<DbgInfoIntrinsic>(I))
continue;
++NumInsts;
- if (isNonVolatileLoad(&I)) {
- // Postpone processing of non-volatile load instructions
- // See comments below
- Visited.insert(&I);
- NonVolatileLoads.push_back(&I);
- continue;
+ // Regular LTO module doesn't participate in ThinLTO import,
+ // so no reference from it can be read/writeonly, since this
+ // would require importing variable as local copy
+ if (IsThinLTO) {
+ if (isNonVolatileLoad(&I)) {
+ // Postpone processing of non-volatile load instructions
+ // See comments below
+ Visited.insert(&I);
+ NonVolatileLoads.push_back(&I);
+ continue;
+ } else if (isNonVolatileStore(&I)) {
+ Visited.insert(&I);
+ NonVolatileStores.push_back(&I);
+ // All references from second operand of store (destination address)
+ // can be considered write-only if they're not referenced by any
+ // non-store instruction. References from first operand of store
+ // (stored value) can't be treated either as read- or as write-only
+ // so we add them to RefEdges as we do with all other instructions
+ // except non-volatile load.
+ Value *Stored = I.getOperand(0);
+ if (auto *GV = dyn_cast<GlobalValue>(Stored))
+ // findRefEdges will try to examine GV operands, so instead
+ // of calling it we should add GV to RefEdges directly.
+ RefEdges.insert(Index.getOrInsertValueInfo(GV));
+ else if (auto *U = dyn_cast<User>(Stored))
+ findRefEdges(Index, U, RefEdges, Visited);
+ continue;
+ }
}
findRefEdges(Index, &I, RefEdges, Visited);
auto CS = ImmutableCallSite(&I);
@@ -357,24 +391,61 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
- // By now we processed all instructions in a function, except
- // non-volatile loads. All new refs we add in a loop below
- // are obviously constant. All constant refs are grouped in the
- // end of RefEdges vector, so we can use a single integer value
- // to identify them.
- unsigned RefCnt = RefEdges.size();
- for (const Instruction *I : NonVolatileLoads) {
- Visited.erase(I);
- findRefEdges(Index, I, RefEdges, Visited);
- }
- std::vector<ValueInfo> Refs = RefEdges.takeVector();
- // Regular LTO module doesn't participate in ThinLTO import,
- // so no reference from it can be readonly, since this would
- // require importing variable as local copy
- if (IsThinLTO)
- for (; RefCnt < Refs.size(); ++RefCnt)
+ std::vector<ValueInfo> Refs;
+ if (IsThinLTO) {
+ auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
+ SetVector<ValueInfo> &Edges,
+ SmallPtrSet<const User *, 8> &Cache) {
+ for (const auto *I : Instrs) {
+ Cache.erase(I);
+ findRefEdges(Index, I, Edges, Cache);
+ }
+ };
+
+ // By now we processed all instructions in a function, except
+ // non-volatile loads and non-volatile value stores. Let's find
+ // ref edges for both of instruction sets
+ AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
+ // We can add some values to the Visited set when processing load
+ // instructions which are also used by stores in NonVolatileStores.
+ // For example this can happen if we have following code:
+ //
+ // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
+ // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
+ //
+ // After processing loads we'll add bitcast to the Visited set, and if
+ // we use the same set while processing stores, we'll never see store
+ // to @bar and @bar will be mistakenly treated as readonly.
+ SmallPtrSet<const llvm::User *, 8> StoreCache;
+ AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
+
+ // If both load and store instruction reference the same variable
+ // we won't be able to optimize it. Add all such reference edges
+ // to RefEdges set.
+ for (auto &VI : StoreRefEdges)
+ if (LoadRefEdges.remove(VI))
+ RefEdges.insert(VI);
+
+ unsigned RefCnt = RefEdges.size();
+ // All new reference edges inserted in two loops below are either
+ // read or write only. They will be grouped in the end of RefEdges
+ // vector, so we can use a single integer value to identify them.
+ for (auto &VI : LoadRefEdges)
+ RefEdges.insert(VI);
+
+ unsigned FirstWORef = RefEdges.size();
+ for (auto &VI : StoreRefEdges)
+ RefEdges.insert(VI);
+
+ Refs = RefEdges.takeVector();
+ for (; RefCnt < FirstWORef; ++RefCnt)
Refs[RefCnt].setReadOnly();
+ for (; RefCnt < Refs.size(); ++RefCnt)
+ Refs[RefCnt].setWriteOnly();
+ } else {
+ Refs = RefEdges.takeVector();
+ }
// Explicit add hot edges to enforce importing for designated GUIDs for
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
@@ -387,7 +458,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
bool NotEligibleForImport =
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
- /* Live = */ false, F.isDSOLocal());
+ /* Live = */ false, F.isDSOLocal(),
+ F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
FunctionSummary::FFlags FunFlags{
F.hasFnAttribute(Attribute::ReadNone),
F.hasFnAttribute(Attribute::ReadOnly),
@@ -406,26 +478,134 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
Index.addGlobalValueSummary(F, std::move(FuncSummary));
}
+/// Find function pointers referenced within the given vtable initializer
+/// (or subset of an initializer) \p I. The starting offset of \p I within
+/// the vtable initializer is \p StartingOffset. Any discovered function
+/// pointers are added to \p VTableFuncs along with their cumulative offset
+/// within the initializer.
+static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
+ const Module &M, ModuleSummaryIndex &Index,
+ VTableFuncList &VTableFuncs) {
+ // First check if this is a function pointer.
+ if (I->getType()->isPointerTy()) {
+ auto Fn = dyn_cast<Function>(I->stripPointerCasts());
+ // We can disregard __cxa_pure_virtual as a possible call target, as
+ // calls to pure virtuals are UB.
+ if (Fn && Fn->getName() != "__cxa_pure_virtual")
+ VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
+ return;
+ }
+
+ // Walk through the elements in the constant struct or array and recursively
+ // look for virtual function pointers.
+ const DataLayout &DL = M.getDataLayout();
+ if (auto *C = dyn_cast<ConstantStruct>(I)) {
+ StructType *STy = dyn_cast<StructType>(C->getType());
+ assert(STy);
+ const StructLayout *SL = DL.getStructLayout(C->getType());
+
+ for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ auto Offset = SL->getElementOffset(EI - EB);
+ unsigned Op = SL->getElementContainingOffset(Offset);
+ findFuncPointers(cast<Constant>(I->getOperand(Op)),
+ StartingOffset + Offset, M, Index, VTableFuncs);
+ }
+ } else if (auto *C = dyn_cast<ConstantArray>(I)) {
+ ArrayType *ATy = C->getType();
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ findFuncPointers(cast<Constant>(I->getOperand(i)),
+ StartingOffset + i * EltSize, M, Index, VTableFuncs);
+ }
+ }
+}
+
+// Identify the function pointers referenced by vtable definition \p V.
+static void computeVTableFuncs(ModuleSummaryIndex &Index,
+ const GlobalVariable &V, const Module &M,
+ VTableFuncList &VTableFuncs) {
+ if (!V.isConstant())
+ return;
+
+ findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
+ VTableFuncs);
+
+#ifndef NDEBUG
+ // Validate that the VTableFuncs list is ordered by offset.
+ uint64_t PrevOffset = 0;
+ for (auto &P : VTableFuncs) {
+ // The findVFuncPointers traversal should have encountered the
+ // functions in offset order. We need to use ">=" since PrevOffset
+ // starts at 0.
+ assert(P.VTableOffset >= PrevOffset);
+ PrevOffset = P.VTableOffset;
+ }
+#endif
+}
+
+/// Record vtable definition \p V for each type metadata it references.
static void
-computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
- DenseSet<GlobalValue::GUID> &CantBePromoted) {
+recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
+ const GlobalVariable &V,
+ SmallVectorImpl<MDNode *> &Types) {
+ for (MDNode *Type : Types) {
+ auto TypeID = Type->getOperand(1).get();
+
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ if (auto *TypeId = dyn_cast<MDString>(TypeID))
+ Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
+ .push_back({Offset, Index.getOrInsertValueInfo(&V)});
+ }
+}
+
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+ const GlobalVariable &V,
+ DenseSet<GlobalValue::GUID> &CantBePromoted,
+ const Module &M,
+ SmallVectorImpl<MDNode *> &Types) {
SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
- /* Live = */ false, V.isDSOLocal());
+ /* Live = */ false, V.isDSOLocal(),
+ V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
+
+ VTableFuncList VTableFuncs;
+ // If splitting is not enabled, then we compute the summary information
+ // necessary for index-based whole program devirtualization.
+ if (!Index.enableSplitLTOUnit()) {
+ Types.clear();
+ V.getMetadata(LLVMContext::MD_type, Types);
+ if (!Types.empty()) {
+ // Identify the function pointers referenced by this vtable definition.
+ computeVTableFuncs(Index, V, M, VTableFuncs);
+
+ // Record this vtable definition for each type metadata it references.
+ recordTypeIdCompatibleVtableReferences(Index, V, Types);
+ }
+ }
- // Don't mark variables we won't be able to internalize as read-only.
- GlobalVarSummary::GVarFlags VarFlags(
+ // Don't mark variables we won't be able to internalize as read/write-only.
+ bool CanBeInternalized =
!V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
- !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+ !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
+ GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
if (HasBlockAddress)
GVarSummary->setNotEligibleToImport();
+ if (!VTableFuncs.empty())
+ GVarSummary->setVTableFuncs(VTableFuncs);
Index.addGlobalValueSummary(V, std::move(GVarSummary));
}
@@ -434,12 +614,15 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
- /* Live = */ false, A.isDSOLocal());
+ /* Live = */ false, A.isDSOLocal(),
+ A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
auto AS = llvm::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getBaseObject();
- auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
- assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
- AS->setAliasee(AliaseeSummary);
+ auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
+ assert(AliaseeVI && "Alias expects aliasee summary to be available");
+ assert(AliaseeVI.getSummaryList().size() == 1 &&
+ "Expected a single entry per aliasee in per-module index");
+ AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
if (NonRenamableLocal)
CantBePromoted.insert(A.getGUID());
Index.addGlobalValueSummary(A, std::move(AS));
@@ -507,7 +690,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
/* NotEligibleToImport = */ true,
/* Live = */ true,
- /* Local */ GV->isDSOLocal());
+ /* Local */ GV->isDSOLocal(),
+ GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
@@ -531,7 +715,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
} else {
std::unique_ptr<GlobalVarSummary> Summary =
llvm::make_unique<GlobalVarSummary>(
- GVFlags, GlobalVarSummary::GVarFlags(),
+ GVFlags, GlobalVarSummary::GVarFlags(false, false),
ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
}
@@ -568,10 +752,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Compute summaries for all variables defined in module, and save in the
// index.
+ SmallVector<MDNode *, 2> Types;
for (const GlobalVariable &G : M.globals()) {
if (G.isDeclaration())
continue;
- computeVariableSummary(Index, G, CantBePromoted);
+ computeVariableSummary(Index, G, CantBePromoted, M, Types);
}
// Compute summaries for all aliases defined in module, and save in the
@@ -626,6 +811,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
}
}
+ if (!ModuleSummaryDotFile.empty()) {
+ std::error_code EC;
+ raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+ if (EC)
+ report_fatal_error(Twine("Failed to open dot file ") +
+ ModuleSummaryDotFile + ": " + EC.message() + "\n");
+ Index.exportToDot(OSDot);
+ }
+
return Index;
}
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index 180c38ddacc2..b616cd6f762b 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -1,9 +1,8 @@
//===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -194,7 +193,8 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
SmallPtrSet<const BasicBlock *, 4> Predecessors;
collectTransitivePredecessors(CurLoop, BB, Predecessors);
- // Make sure that all successors of all predecessors of BB are either:
+ // Make sure that all successors of, all predecessors of BB which are not
+ // dominated by BB, are either:
// 1) BB,
// 2) Also predecessors of BB,
// 3) Exit blocks which are not taken on 1st iteration.
@@ -204,6 +204,12 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
// Predecessor block may throw, so it has a side exit.
if (blockMayThrow(Pred))
return false;
+
+ // BB dominates Pred, so if Pred runs, BB must run.
+ // This is true when Pred is a loop latch.
+ if (DT->dominates(BB, Pred))
+ continue;
+
for (auto *Succ : successors(Pred))
if (CheckedSuccessors.insert(Succ).second &&
Succ != BB && !Predecessors.count(Succ))
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 95ae1a6e744f..811033e73147 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -38,9 +37,10 @@ using namespace llvm;
using namespace llvm::objcarc;
AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableARCOpts)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// First, strip off no-ops, including ObjC-specific no-ops, and try making a
// precise alias query.
@@ -48,7 +48,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
const Value *SB = GetRCIdentityRoot(LocB.Ptr);
AliasResult Result =
AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
- MemoryLocation(SB, LocB.Size, LocB.AATags));
+ MemoryLocation(SB, LocB.Size, LocB.AATags), AAQI);
if (Result != MayAlias)
return Result;
@@ -57,7 +57,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
const Value *UA = GetUnderlyingObjCPtr(SA, DL);
const Value *UB = GetUnderlyingObjCPtr(SB, DL);
if (UA != SA || UB != SB) {
- Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
+ Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB), AAQI);
// We can't use MustAlias or PartialAlias results here because
// GetUnderlyingObjCPtr may return an offsetted pointer value.
if (Result == NoAlias)
@@ -70,22 +70,23 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
}
bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+ AAQueryInfo &AAQI, bool OrLocal) {
if (!EnableARCOpts)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
// First, strip off no-ops, including ObjC-specific no-ops, and try making
// a precise alias query.
const Value *S = GetRCIdentityRoot(Loc.Ptr);
if (AAResultBase::pointsToConstantMemory(
- MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
+ MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, OrLocal))
return true;
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
const Value *U = GetUnderlyingObjCPtr(S, DL);
if (U != S)
- return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+ return AAResultBase::pointsToConstantMemory(MemoryLocation(U), AAQI,
+ OrLocal);
// If that failed, fail. We don't need to chain here, since that's covered
// by the earlier precise query.
@@ -107,9 +108,10 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
}
ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableARCOpts)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
switch (GetBasicARCInstKind(Call)) {
case ARCInstKind::Retain:
@@ -128,7 +130,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
break;
}
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp
index d6db6386c38b..56d1cb421225 100644
--- a/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 31c432711834..0e96c6e975c9 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -1,9 +1,8 @@
//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -482,6 +481,41 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::ClaimRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return true;
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
/// Test if the given class represents instructions which are always safe
/// to mark with the "tail" keyword.
bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp
index 8ece0a2a3ed3..72c40a0be232 100644
--- a/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
//===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 5f4fe0f7dda2..48f2a4020c66 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -1,9 +1,8 @@
//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,3 +85,27 @@ bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
return comesBefore(A, B);
}
+
+void OrderedBasicBlock::eraseInstruction(const Instruction *I) {
+ if (LastInstFound != BB->end() && I == &*LastInstFound) {
+ if (LastInstFound == BB->begin()) {
+ LastInstFound = BB->end();
+ NextInstPos = 0;
+ } else
+ LastInstFound--;
+ }
+
+ NumberedInsts.erase(I);
+}
+
+void OrderedBasicBlock::replaceInstruction(const Instruction *Old,
+ const Instruction *New) {
+ auto OI = NumberedInsts.find(Old);
+ if (OI == NumberedInsts.end())
+ return;
+
+ NumberedInsts.insert({New, OI->second});
+ if (LastInstFound != BB->end() && Old == &*LastInstFound)
+ LastInstFound = New->getIterator();
+ NumberedInsts.erase(Old);
+}
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
index 7b155208c02e..458c0a7de6c2 100644
--- a/lib/Analysis/OrderedInstructions.cpp
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -1,9 +1,8 @@
//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 858f08f6537a..7f77ab146c4c 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -1,9 +1,8 @@
//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/PhiValues.cpp b/lib/Analysis/PhiValues.cpp
index 729227c86697..49749bc44746 100644
--- a/lib/Analysis/PhiValues.cpp
+++ b/lib/Analysis/PhiValues.cpp
@@ -1,9 +1,8 @@
//===- PhiValues.cpp - Phi Value Analysis ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index e6b660fe26d7..4afe22bd5342 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -1,9 +1,8 @@
//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 1d70c75f2e1c..dce19d6d546e 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -1,9 +1,8 @@
//===- ProfileSummaryInfo.cpp - Global profile summary information --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,10 +60,9 @@ static cl::opt<int> ProfileSummaryColdCount(
// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
- auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) {
+ auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
return Entry.Cutoff < Percentile;
- };
- auto It = std::lower_bound(DS.begin(), DS.end(), Percentile, Compare);
+ });
// The required percentile has to be <= one of the percentiles in the
// detailed summary.
if (It == DS.end())
@@ -80,7 +78,14 @@ static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
bool ProfileSummaryInfo::computeSummary() {
if (Summary)
return true;
- auto *SummaryMD = M.getProfileSummary();
+ // First try to get context sensitive ProfileSummary.
+ auto *SummaryMD = M.getProfileSummary(/* IsCS */ true);
+ if (SummaryMD) {
+ Summary.reset(ProfileSummary::getFromMD(SummaryMD));
+ return true;
+ }
+ // This will actually return PSK_Instr or PSK_Sample summary.
+ SummaryMD = M.getProfileSummary(/* IsCS */ false);
if (!SummaryMD)
return false;
Summary.reset(ProfileSummary::getFromMD(SummaryMD));
@@ -89,7 +94,8 @@ bool ProfileSummaryInfo::computeSummary() {
Optional<uint64_t>
ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
- BlockFrequencyInfo *BFI) {
+ BlockFrequencyInfo *BFI,
+ bool AllowSynthetic) {
if (!Inst)
return None;
assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
@@ -105,7 +111,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
return None;
}
if (BFI)
- return BFI->getBlockProfileCount(Inst->getParent());
+ return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic);
return None;
}
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 1fdaf4d55b59..9a834ba4866a 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -1,9 +1,8 @@
//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,5 +34,11 @@ bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
if (!IsOffsetKnown)
return false;
- return GEPI.accumulateConstantOffset(DL, Offset);
+ APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0);
+ if (GEPI.accumulateConstantOffset(DL, TmpOffset)) {
+ Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth());
+ return true;
+ }
+
+ return false;
}
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 2bd611350f46..8ba38adfb0d2 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -1,9 +1,8 @@
//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Detects single entry single exit regions in the control flow graph.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index a101ff109199..6c0d17b45c62 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -1,9 +1,8 @@
//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -279,12 +278,17 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
return new PrintRegionPass(Banner, O);
}
+static std::string getDescription(const Region &R) {
+ return "region";
+}
+
bool RegionPass::skipRegion(Region &R) const {
Function &F = *R.getEntry()->getParent();
- if (!F.getContext().getOptPassGate().shouldRunPass(this, R))
+ OptPassGate &Gate = F.getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(R)))
return true;
- if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F.hasOptNone()) {
// Report this only once per function.
if (R.getEntry() == &F.getEntryBlock())
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 5986b8c4e0c3..5bdcb31fbe99 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -1,9 +1,8 @@
//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Print out the region tree of a function using dotty/graphviz.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e5134f2eeda9..bc2cfd6fcc42 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -203,15 +202,20 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth(
cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
static cl::opt<unsigned>
- MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
- cl::desc("Maximum depth of recursive SExt/ZExt"),
- cl::init(8));
+ MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
+ cl::init(8));
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(8));
+static cl::opt<unsigned>
+ HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
+ cl::desc("Size of the expression which is considered huge"),
+ cl::init(4096));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -273,7 +277,9 @@ void SCEV::print(raw_ostream &OS) const {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
const char *OpStr = nullptr;
switch (NAry->getSCEVType()) {
@@ -281,6 +287,12 @@ void SCEV::print(raw_ostream &OS) const {
case scMulExpr: OpStr = " * "; break;
case scUMaxExpr: OpStr = " umax "; break;
case scSMaxExpr: OpStr = " smax "; break;
+ case scUMinExpr:
+ OpStr = " umin ";
+ break;
+ case scSMinExpr:
+ OpStr = " smin ";
+ break;
}
OS << "(";
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
@@ -349,6 +361,8 @@ Type *SCEV::getType() const {
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
return cast<SCEVNAryExpr>(this)->getType();
case scAddExpr:
return cast<SCEVAddExpr>(this)->getType();
@@ -393,7 +407,7 @@ bool SCEV::isNonConstantNegative() const {
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
- SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+ SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
bool SCEVCouldNotCompute::classof(const SCEV *S) {
return S->getSCEVType() == scCouldNotCompute;
@@ -422,7 +436,7 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
unsigned SCEVTy, const SCEV *op, Type *ty)
- : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+ : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
@@ -713,7 +727,9 @@ static int CompareSCEVComplexity(
case scAddExpr:
case scMulExpr:
case scSMaxExpr:
- case scUMaxExpr: {
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr: {
const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
@@ -795,11 +811,10 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
// Do the rough sort by complexity.
- std::stable_sort(Ops.begin(), Ops.end(),
- [&](const SCEV *LHS, const SCEV *RHS) {
- return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
- LHS, RHS, DT) < 0;
- });
+ llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) {
+ return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) <
+ 0;
+ });
// Now that we are sorted by complexity, group elements of the same
// complexity. Note that this is, at worst, N^2, but the vector is likely to
@@ -846,6 +861,17 @@ static inline int sizeOfSCEV(const SCEV *S) {
return F.Size;
}
+/// Returns true if the subtree of \p S contains at least HugeExprThreshold
+/// nodes.
+static bool isHugeExpression(const SCEV *S) {
+ return S->getExpressionSize() >= HugeExprThreshold;
+}
+
+/// Returns true of \p Ops contains a huge SCEV (see definition above).
+static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
+ return any_of(Ops, isHugeExpression);
+}
+
namespace {
struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
@@ -913,6 +939,8 @@ public:
void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
+ void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
void visitUnknown(const SCEVUnknown *Numerator) {}
void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
@@ -1219,8 +1247,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
// SCEV Expression folder implementations
//===----------------------------------------------------------------------===//
-const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
- Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
+ unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
@@ -1241,15 +1269,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
- return getTruncateExpr(ST->getOperand(), Ty);
+ return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getTruncateOrSignExtend(SS->getOperand(), Ty);
+ return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
+
+ if (Depth > MaxCastDepth) {
+ SCEV *S =
+ new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
+ return S;
+ }
// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
@@ -1261,7 +1297,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
unsigned numTruncs = 0;
for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
++i) {
- const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty);
+ const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
numTruncs++;
Operands.push_back(S);
@@ -1285,7 +1321,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AddRec->operands())
- Operands.push_back(getTruncateExpr(Op, Ty));
+ Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
@@ -1619,7 +1655,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- if (Depth > MaxExtDepth) {
+ if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
@@ -1637,7 +1673,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
CR.zextOrTrunc(NewBits)))
- return getTruncateOrZeroExtend(X, Ty);
+ return getTruncateOrZeroExtend(X, Ty, Depth);
}
// If the input value is a chrec scev, and we can prove that the value
@@ -1679,9 +1715,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType());
- const SCEV *RecastedMaxBECount =
- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+ CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
@@ -1930,7 +1966,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Limit recursion depth.
- if (Depth > MaxExtDepth) {
+ if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
@@ -1948,7 +1984,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).signExtend(NewBits).contains(
CR.sextOrTrunc(NewBits)))
- return getTruncateOrSignExtend(X, Ty);
+ return getTruncateOrSignExtend(X, Ty, Depth);
}
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
@@ -2023,9 +2059,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType());
- const SCEV *RecastedMaxBECount =
- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+ CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
@@ -2295,7 +2331,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// can't-overflow flags for the operation if possible.
static SCEV::NoWrapFlags
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
- const SmallVectorImpl<const SCEV *> &Ops,
+ const ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
@@ -2405,7 +2441,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
// Limit recursion calls depth.
- if (Depth > MaxArithDepth)
+ if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, Flags);
// Okay, check to see if the same value occurs in the operand list more than
@@ -2743,7 +2779,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
@@ -2765,7 +2801,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
@@ -2788,7 +2824,7 @@ ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
@@ -2884,7 +2920,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// Limit recursion calls depth.
- if (Depth > MaxArithDepth)
+ if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateMulExpr(Ops, Flags);
// If there are any constants, fold them together.
@@ -3057,7 +3093,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Limit max number of arguments to avoid creation of unreasonably big
// SCEVAddRecs with very complex operands.
if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
- MaxAddRecSize)
+ MaxAddRecSize || isHugeExpression(AddRec) ||
+ isHugeExpression(OtherAddRec))
continue;
bool Overflow = false;
@@ -3090,7 +3127,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
- const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;
@@ -3493,209 +3530,166 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
return getAddExpr(BaseExpr, TotalOffset, Wrap);
}
-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
- const SCEV *RHS) {
- SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
- return getSMaxExpr(Ops);
+std::tuple<const SCEV *, FoldingSetNodeID, void *>
+ScalarEvolution::findExistingSCEVInCache(int SCEVType,
+ ArrayRef<const SCEV *> Ops) {
+ FoldingSetNodeID ID;
+ void *IP = nullptr;
+ ID.AddInteger(SCEVType);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ return std::tuple<const SCEV *, FoldingSetNodeID, void *>(
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
}
-const SCEV *
-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "Cannot get empty smax!");
+const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
+ SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
- "SCEVSMaxExpr operand types don't match!");
+ "Operand types don't match!");
#endif
+ bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
+ bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
+
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
+ // Check if we have created the same expression before.
+ if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
+ return S;
+ }
+
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
+ auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
+ if (Kind == scSMaxExpr)
+ return APIntOps::smax(LHS, RHS);
+ else if (Kind == scSMinExpr)
+ return APIntOps::smin(LHS, RHS);
+ else if (Kind == scUMaxExpr)
+ return APIntOps::umax(LHS, RHS);
+ else if (Kind == scUMinExpr)
+ return APIntOps::umin(LHS, RHS);
+ llvm_unreachable("Unknown SCEV min/max opcode");
+ };
+
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(
- getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
+ getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
- // If we are left with a constant minimum-int, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
+ bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
+
+ if (IsMax ? IsMinV : IsMaxV) {
+ // If we are left with a constant minimum(/maximum)-int, strip it off.
Ops.erase(Ops.begin());
--Idx;
- } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
- // If we have an smax with a constant maximum-int, it will always be
- // maximum-int.
- return Ops[0];
+ } else if (IsMax ? IsMaxV : IsMinV) {
+ // If we have a max(/min) with a constant maximum(/minimum)-int,
+ // it will always be the extremum.
+ return LHSC;
}
if (Ops.size() == 1) return Ops[0];
}
- // Find the first SMax
- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ // Find the first operation of the same kind
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
++Idx;
- // Check to see if one of the operands is an SMax. If so, expand its operands
- // onto our operand list, and recurse to simplify.
+ // Check to see if one of the operands is of the same kind. If so, expand its
+ // operands onto our operand list, and recurse to simplify.
if (Idx < Ops.size()) {
- bool DeletedSMax = false;
- while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ bool DeletedAny = false;
+ while (Ops[Idx]->getSCEVType() == Kind) {
+ const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
Ops.erase(Ops.begin()+Idx);
- Ops.append(SMax->op_begin(), SMax->op_end());
- DeletedSMax = true;
+ Ops.append(SMME->op_begin(), SMME->op_end());
+ DeletedAny = true;
}
- if (DeletedSMax)
- return getSMaxExpr(Ops);
+ if (DeletedAny)
+ return getMinMaxExpr(Kind, Ops);
}
// Okay, check to see if the same value occurs in the operand list twice. If
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- // X smax Y smax Y --> X smax Y
- // X smax Y --> X, if X is always greater than Y
- if (Ops[i] == Ops[i+1] ||
- isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
- --i; --e;
- } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
- --i; --e;
+ llvm::CmpInst::Predicate GEPred =
+ IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ llvm::CmpInst::Predicate LEPred =
+ IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
+ llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
+ for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
+ if (Ops[i] == Ops[i + 1] ||
+ isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
+ // X op Y op Y --> X op Y
+ // X op Y --> X, if we know X, Y are ordered appropriately
+ Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
+ --i;
+ --e;
+ } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
+ Ops[i + 1])) {
+ // X op Y --> Y, if we know X, Y are ordered appropriately
+ Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
+ --i;
+ --e;
}
+ }
if (Ops.size() == 1) return Ops[0];
assert(!Ops.empty() && "Reduced smax down to nothing!");
- // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
+ const SCEV *ExistingSCEV;
FoldingSetNodeID ID;
- ID.AddInteger(scSMaxExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
- void *IP = nullptr;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ void *IP;
+ std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
+ if (ExistingSCEV)
+ return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
+ SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
+ ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
+
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
- const SCEV *RHS) {
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
- return getUMaxExpr(Ops);
+ return getSMaxExpr(Ops);
}
-const SCEV *
-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "Cannot get empty umax!");
- if (Ops.size() == 1) return Ops[0];
-#ifndef NDEBUG
- Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
- for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
- "SCEVUMaxExpr operand types don't match!");
-#endif
-
- // Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI, DT);
-
- // If there are any constants, fold them together.
- unsigned Idx = 0;
- if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
- ++Idx;
- assert(Idx < Ops.size());
- while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
- // We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(
- getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
- Ops[0] = getConstant(Fold);
- Ops.erase(Ops.begin()+1); // Erase the folded element
- if (Ops.size() == 1) return Ops[0];
- LHSC = cast<SCEVConstant>(Ops[0]);
- }
-
- // If we are left with a constant minimum-int, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
- Ops.erase(Ops.begin());
- --Idx;
- } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
- // If we have an umax with a constant maximum-int, it will always be
- // maximum-int.
- return Ops[0];
- }
-
- if (Ops.size() == 1) return Ops[0];
- }
-
- // Find the first UMax
- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
- ++Idx;
-
- // Check to see if one of the operands is a UMax. If so, expand its operands
- // onto our operand list, and recurse to simplify.
- if (Idx < Ops.size()) {
- bool DeletedUMax = false;
- while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
- Ops.erase(Ops.begin()+Idx);
- Ops.append(UMax->op_begin(), UMax->op_end());
- DeletedUMax = true;
- }
-
- if (DeletedUMax)
- return getUMaxExpr(Ops);
- }
-
- // Okay, check to see if the same value occurs in the operand list twice. If
- // so, delete one. Since we sorted the list, these values are required to
- // be adjacent.
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- // X umax Y umax Y --> X umax Y
- // X umax Y --> X, if X is always greater than Y
- if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
- ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
- Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
- --i; --e;
- } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
- Ops[i + 1])) {
- Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
- --i; --e;
- }
-
- if (Ops.size() == 1) return Ops[0];
+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ return getMinMaxExpr(scSMaxExpr, Ops);
+}
- assert(!Ops.empty() && "Reduced umax down to nothing!");
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+ return getUMaxExpr(Ops);
+}
- // Okay, it looks like we really DO need a umax expr. Check to see if we
- // already have one, otherwise create a new one.
- FoldingSetNodeID ID;
- ID.AddInteger(scUMaxExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
- void *IP = nullptr;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
- std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
- UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
- return S;
+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ return getMinMaxExpr(scUMaxExpr, Ops);
}
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
@@ -3705,11 +3699,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
- // ~smax(~x, ~y, ~z) == smin(x, y, z).
- SmallVector<const SCEV *, 2> NotOps;
- for (auto *S : Ops)
- NotOps.push_back(getNotSCEV(S));
- return getNotSCEV(getSMaxExpr(NotOps));
+ return getMinMaxExpr(scSMinExpr, Ops);
}
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
@@ -3719,16 +3709,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "At least one operand must be!");
- // Trivial case.
- if (Ops.size() == 1)
- return Ops[0];
-
- // ~umax(~x, ~y, ~z) == umin(x, y, z).
- SmallVector<const SCEV *, 2> NotOps;
- for (auto *S : Ops)
- NotOps.push_back(getNotSCEV(S));
- return getNotSCEV(getUMaxExpr(NotOps));
+ return getMinMaxExpr(scUMinExpr, Ops);
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
@@ -3892,7 +3873,7 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursevely
+/// TODO: In reality it is better to check the poison recursively
/// but this is better than nothing.
static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
@@ -3970,12 +3951,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
}
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+ const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (!Add || Add->getNumOperands() != 2 ||
+ !Add->getOperand(0)->isAllOnesValue())
+ return nullptr;
+
+ const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+ if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+ !AddRHS->getOperand(0)->isAllOnesValue())
+ return nullptr;
+
+ return AddRHS->getOperand(1);
+}
+
/// Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+ // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
+ if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
+ auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
+ SmallVector<const SCEV *, 2> MatchedOperands;
+ for (const SCEV *Operand : MME->operands()) {
+ const SCEV *Matched = MatchNotExpr(Operand);
+ if (!Matched)
+ return (const SCEV *)nullptr;
+ MatchedOperands.push_back(Matched);
+ }
+ return getMinMaxExpr(
+ SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
+ MatchedOperands);
+ };
+ if (const SCEV *Replaced = MatchMinMaxNegation(MME))
+ return Replaced;
+ }
+
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
const SCEV *AllOnes =
@@ -4022,29 +4036,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
}
-const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty);
- return getZeroExtendExpr(V, Ty);
+ return getTruncateExpr(V, Ty, Depth);
+ return getZeroExtendExpr(V, Ty, Depth);
}
-const SCEV *
-ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
- Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty);
- return getSignExtendExpr(V, Ty);
+ return getTruncateExpr(V, Ty, Depth);
+ return getSignExtendExpr(V, Ty, Depth);
}
const SCEV *
@@ -4530,52 +4543,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
break;
- auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
- if (!CI)
+ auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
+ if (!WO)
break;
- if (auto *F = CI->getCalledFunction())
- switch (F->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1));
-
- // Now that we know that all uses of the arithmetic-result component of
- // CI are guarded by the overflow check, we can go ahead and pretend
- // that the arithmetic is non-overflowing.
- if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ true,
- /* IsNUW = */ false);
- else
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ false,
- /* IsNUW*/ true);
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1));
-
- // The same reasoning as sadd/uadd above.
- if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow)
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ true,
- /* IsNUW = */ false);
- else
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ false,
- /* IsNUW = */ true);
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
- CI->getArgOperand(1));
- default:
- break;
- }
- break;
+ Instruction::BinaryOps BinOp = WO->getBinaryOp();
+ bool Signed = WO->isSigned();
+ // TODO: Should add nuw/nsw flags for mul as well.
+ if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT))
+ return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
+
+ // Now that we know that all uses of the arithmetic-result component of
+ // CI are guarded by the overflow check, we can go ahead and pretend
+ // that the arithmetic is non-overflowing.
+ return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
+ /* IsNSW = */ Signed, /* IsNUW = */ !Signed);
}
default:
@@ -5009,7 +4991,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
return PHISCEV;
}
@@ -5196,6 +5178,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
switch (S->getSCEVType()) {
case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
// These expressions are available if their operand(s) is/are.
return true;
@@ -5551,6 +5535,9 @@ ScalarEvolution::getRangeRef(const SCEV *S,
DenseMap<const SCEV *, ConstantRange> &Cache =
SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
: SignedRanges;
+ ConstantRange::PreferredRangeType RangeType =
+ SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
+ ? ConstantRange::Unsigned : ConstantRange::Signed;
// See if we've computed this range already.
DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
@@ -5581,53 +5568,60 @@ ScalarEvolution::getRangeRef(const SCEV *S,
ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getRangeRef(Add->getOperand(i), SignHint));
- return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(Add, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
- return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(Mul, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
- return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(SMax, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
- return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(UMax, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
return setRange(UDiv, SignHint,
- ConservativeResult.intersectWith(X.udiv(Y)));
+ ConservativeResult.intersectWith(X.udiv(Y), RangeType));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
return setRange(ZExt, SignHint,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
+ RangeType));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
return setRange(SExt, SignHint,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ ConservativeResult.intersectWith(X.signExtend(BitWidth),
+ RangeType));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
return setRange(Trunc, SignHint,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ ConservativeResult.intersectWith(X.truncate(BitWidth),
+ RangeType));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -5637,7 +5631,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
ConservativeResult = ConservativeResult.intersectWith(
- ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
+ ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType);
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
@@ -5651,11 +5645,11 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (AllNonNeg)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt(BitWidth, 0),
- APInt::getSignedMinValue(BitWidth)));
+ APInt::getSignedMinValue(BitWidth)), RangeType);
else if (AllNonPos)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt(BitWidth, 1)));
+ APInt(BitWidth, 1)), RangeType);
}
// TODO: non-affine addrec
@@ -5668,14 +5662,14 @@ ScalarEvolution::getRangeRef(const SCEV *S,
BitWidth);
if (!RangeFromAffine.isFullSet())
ConservativeResult =
- ConservativeResult.intersectWith(RangeFromAffine);
+ ConservativeResult.intersectWith(RangeFromAffine, RangeType);
auto RangeFromFactoring = getRangeViaFactoring(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
if (!RangeFromFactoring.isFullSet())
ConservativeResult =
- ConservativeResult.intersectWith(RangeFromFactoring);
+ ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
}
}
@@ -5686,7 +5680,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// Check if the IR explicitly contains !range metadata.
Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
if (MDRange.hasValue())
- ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
+ RangeType);
// Split here to avoid paying the compile-time cost of calling both
// computeKnownBits and ComputeNumSignBits. This restriction can be lifted
@@ -5697,8 +5692,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (Known.One != ~Known.Zero + 1)
ConservativeResult =
- ConservativeResult.intersectWith(ConstantRange(Known.One,
- ~Known.Zero + 1));
+ ConservativeResult.intersectWith(
+ ConstantRange(Known.One, ~Known.Zero + 1), RangeType);
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
@@ -5706,7 +5701,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
- APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
+ RangeType);
}
// A range of Phi is a subset of union of all ranges of its input.
@@ -5721,7 +5717,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (RangeFromOps.isFullSet())
break;
}
- ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromOps, RangeType);
bool Erased = PendingPhiRanges.erase(Phi);
assert(Erased && "Failed to erase Phi properly?");
(void) Erased;
@@ -5751,7 +5748,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// FullRange), then we don't know anything about the final range either.
// Return FullRange.
if (StartRange.isFullSet())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
// If Step is signed and negative, then we use its absolute value, but we also
// note that we're moving in the opposite direction.
@@ -5767,7 +5764,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// Check if Offset is more than full span of BitWidth. If it is, the
// expression is guaranteed to overflow.
if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
// Offset is by how much the expression can change. Checks above guarantee no
// overflow here.
@@ -5786,7 +5783,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// range (due to wrap around). This means that the expression can take any
// value in this bitwidth, and we have to return full range.
if (StartRange.contains(MovedBoundary))
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
APInt NewLower =
Descending ? std::move(MovedBoundary) : std::move(StartLower);
@@ -5794,12 +5791,8 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
Descending ? std::move(StartUpper) : std::move(MovedBoundary);
NewUpper += 1;
- // If we end up with full range, return a proper full range.
- if (NewLower == NewUpper)
- return ConstantRange(BitWidth, /* isFullSet = */ true);
-
// No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
- return ConstantRange(std::move(NewLower), std::move(NewUpper));
+ return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
}
ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
@@ -5832,7 +5825,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
MaxBECountValue, BitWidth, /* Signed = */ false);
// Finally, intersect signed and unsigned ranges.
- return SR.intersectWith(UR);
+ return SR.intersectWith(UR, ConstantRange::Smallest);
}
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
@@ -5916,17 +5909,17 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
SelectPattern StartPattern(*this, BitWidth, Start);
if (!StartPattern.isRecognized())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
SelectPattern StepPattern(*this, BitWidth, Step);
if (!StepPattern.isRecognized())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
if (StartPattern.Condition != StepPattern.Condition) {
// We don't handle this case today; but we could, by considering four
// possibilities below instead of two. I'm not sure if there are cases where
// that will help over what getRange already does, though.
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
}
// NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
@@ -6128,7 +6121,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
if (!DT.isReachableFromEntry(I->getParent()))
- return getUnknown(V);
+ return getUnknown(UndefValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
@@ -6744,6 +6737,28 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
return BackedgeTakenCounts.find(L)->second = std::move(Result);
}
+void ScalarEvolution::forgetAllLoops() {
+ // This method is intended to forget all info about loops. It should
+ // invalidate caches as if the following happened:
+ // - The trip counts of all loops have changed arbitrarily
+ // - Every llvm::Value has been updated in place to produce a different
+ // result.
+ BackedgeTakenCounts.clear();
+ PredicatedBackedgeTakenCounts.clear();
+ LoopPropertiesCache.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValueExprMap.clear();
+ ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
+ ExprValueMap.clear();
+ HasRecMap.clear();
+ MinTrailingZerosCache.clear();
+ PredicatedSCEVRewrites.clear();
+}
+
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
auto RemoveLoopFromBackedgeMap =
@@ -6972,8 +6987,8 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
- SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
- &&ExitCounts,
+ ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+ ExitCounts,
bool Complete, const SCEV *MaxCount, bool MaxOrZero)
: MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
@@ -7256,6 +7271,14 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
if (EL0.ExactNotTaken == EL1.ExactNotTaken)
BECount = EL0.ExactNotTaken;
}
+ // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
+ // to be more aggressive when computing BECount than when computing
+ // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
+ // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+ // to not.
+ if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+ !isa<SCEVCouldNotCompute>(BECount))
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, false,
{&EL0.Predicates, &EL1.Predicates});
@@ -7651,7 +7674,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<LoadInst>(I))
+ isa<LoadInst>(I) || isa<ExtractValueInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -8075,7 +8098,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
case scSMaxExpr:
case scUMaxExpr:
- break; // TODO: smax, umax.
+ case scSMinExpr:
+ case scUMinExpr:
+ break; // TODO: smax, umax, smin, umax.
}
return nullptr;
}
@@ -8087,44 +8112,64 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
- const Loop *LI = this->LI[I->getParent()];
- if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
- if (PHINode *PN = dyn_cast<PHINode>(I))
- if (PN->getParent() == LI->getHeader()) {
- // Okay, there is no closed form solution for the PHI node. Check
- // to see if the loop that contains it has a known backedge-taken
- // count. If so, we may be able to force computation of the exit
- // value.
- const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
- if (const SCEVConstant *BTCC =
- dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
-
- // This trivial case can show up in some degenerate cases where
- // the incoming IR has not yet been fully simplified.
- if (BTCC->getValue()->isZero()) {
- Value *InitValue = nullptr;
- bool MultipleInitValues = false;
- for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
- if (!LI->contains(PN->getIncomingBlock(i))) {
- if (!InitValue)
- InitValue = PN->getIncomingValue(i);
- else if (InitValue != PN->getIncomingValue(i)) {
- MultipleInitValues = true;
- break;
- }
- }
- if (!MultipleInitValues && InitValue)
- return getSCEV(InitValue);
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ const Loop *LI = this->LI[I->getParent()];
+ // Looking for loop exit value.
+ if (LI && LI->getParentLoop() == L &&
+ PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ // This trivial case can show up in some degenerate cases where
+ // the incoming IR has not yet been fully simplified.
+ if (BackedgeTakenCount->isZero()) {
+ Value *InitValue = nullptr;
+ bool MultipleInitValues = false;
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (!LI->contains(PN->getIncomingBlock(i))) {
+ if (!InitValue)
+ InitValue = PN->getIncomingValue(i);
+ else if (InitValue != PN->getIncomingValue(i)) {
+ MultipleInitValues = true;
+ break;
}
}
- // Okay, we know how many times the containing loop executes. If
- // this is a constant evolving PHI node, get the final value at
- // the specified iteration number.
- Constant *RV =
- getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
- if (RV) return getSCEV(RV);
}
+ if (!MultipleInitValues && InitValue)
+ return getSCEV(InitValue);
}
+ // Do we have a loop invariant value flowing around the backedge
+ // for a loop which must execute the backedge?
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ isKnownPositive(BackedgeTakenCount) &&
+ PN->getNumIncomingValues() == 2) {
+ unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1;
+ const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred));
+ if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent()))
+ return OnBackedge;
+ }
+ if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV =
+ getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
+ if (RV) return getSCEV(RV);
+ }
+ }
+
+ // If there is a single-input Phi, evaluate it at our scope. If we can
+ // prove that this replacement does not break LCSSA form, use new value.
+ if (PN->getNumOperands() == 1) {
+ const SCEV *Input = getSCEV(PN->getOperand(0));
+ const SCEV *InputAtScope = getSCEVAtScope(Input, L);
+ // TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
+ // for the simplest case just support constants.
+ if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
+ }
+ }
// Okay, this is an expression that we cannot symbolically evaluate
// into a SCEV. Check to see if it's possible to symbolically evaluate
@@ -8198,13 +8243,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
NewOps.push_back(OpAtScope);
}
if (isa<SCEVAddExpr>(Comm))
- return getAddExpr(NewOps);
+ return getAddExpr(NewOps, Comm->getNoWrapFlags());
if (isa<SCEVMulExpr>(Comm))
- return getMulExpr(NewOps);
- if (isa<SCEVSMaxExpr>(Comm))
- return getSMaxExpr(NewOps);
- if (isa<SCEVUMaxExpr>(Comm))
- return getUMaxExpr(NewOps);
+ return getMulExpr(NewOps, Comm->getNoWrapFlags());
+ if (isa<SCEVMinMaxExpr>(Comm))
+ return getMinMaxExpr(Comm->getSCEVType(), NewOps);
llvm_unreachable("Unknown commutative SCEV type!");
}
}
@@ -10045,41 +10088,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
getNotSCEV(FoundLHS));
}
-/// If Expr computes ~A, return A else return nullptr
-static const SCEV *MatchNotExpr(const SCEV *Expr) {
- const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2 ||
- !Add->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2 ||
- !AddRHS->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- return AddRHS->getOperand(1);
-}
-
-/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
- const SCEV *Candidate) {
- const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
- if (!MaxExpr) return false;
-
- return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
-}
-
-/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMinConsistingOf(ScalarEvolution &SE,
- const SCEV *MaybeMinExpr,
- const SCEV *Candidate) {
- const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
- if (!MaybeMaxExpr)
+/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
+template <typename MinMaxExprType>
+static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
+ const SCEV *Candidate) {
+ const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
+ if (!MinMaxExpr)
return false;
- return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+ return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
}
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
@@ -10128,20 +10145,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE:
return
- // min(A, ...) <= A
- IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
- // A <= max(A, ...)
- IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+ // min(A, ...) <= A
+ IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE:
return
- // min(A, ...) <= A
- IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
- // A <= max(A, ...)
- IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+ // min(A, ...) <= A
+ IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
}
llvm_unreachable("covered switch fell through?!");
@@ -10691,13 +10708,10 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
: APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
-
- const SCEV *MaxBECount = getCouldNotCompute();
- if (isa<SCEVConstant>(BECount))
- MaxBECount = BECount;
- else
- MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
- getConstant(MinStride), false);
+ const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
+ ? BECount
+ : computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
@@ -10806,8 +10820,6 @@ static inline bool containsUndefs(const SCEV *S) {
return SCEVExprContains(S, [](const SCEV *S) {
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
return isa<UndefValue>(SU->getValue());
- else if (const auto *SC = dyn_cast<SCEVConstant>(S))
- return isa<UndefValue>(SC->getValue());
return false;
});
}
@@ -11402,19 +11414,23 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() != 1)
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() != 1)
OS << "<multiple exits> ";
- if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
- OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
- } else {
- OS << "Unpredictable backedge-taken count. ";
- }
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
+ else
+ OS << "Unpredictable backedge-taken count.\n";
- OS << "\n"
- "Loop ";
+ if (ExitingBlocks.size() > 1)
+ for (BasicBlock *ExitingBlock : ExitingBlocks) {
+ OS << " exit count for " << ExitingBlock->getName() << ": "
+ << *SE->getExitCount(L, ExitingBlock) << "\n";
+ }
+
+ OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
@@ -11611,7 +11627,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
bool HasVarying = false;
for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
LoopDisposition D = getLoopDisposition(Op, L);
@@ -11698,7 +11716,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
for (const SCEV *NAryOp : NAry->operands()) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 289d4f8ae49a..96da0a24cddd 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
using namespace llvm;
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
@@ -86,11 +85,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
AO ? AAMDNodes() : LocA.AATags),
MemoryLocation(BO ? BO : LocB.Ptr,
BO ? LocationSize::unknown() : LocB.Size,
- BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
+ BO ? AAMDNodes() : LocB.AATags),
+ AAQI) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
/// Given an expression, try to find a base value.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index ca5cf1663b83..e8a95d35482c 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,12 +60,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// instructions that might be inserted before BIP.
if (BasicBlock::iterator(CI) != IP || BIP == IP) {
// Create a new cast, and leave the old cast in place in case
- // it is being used as an insert point. Clear its operand
- // so that it doesn't hold anything live.
+ // it is being used as an insert point.
Ret = CastInst::Create(Op, V, Ty, "", &*IP);
Ret->takeName(CI);
CI->replaceAllUsesWith(Ret);
- CI->setOperand(0, UndefValue::get(V->getType()));
break;
}
Ret = CI;
@@ -167,9 +164,11 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
}
/// InsertBinop - Insert the specified binary operator, doing a small amount
-/// of work to avoid inserting an obviously redundant operation.
+/// of work to avoid inserting an obviously redundant operation, and hoisting
+/// to an outer loop when the opportunity is there and it is safe.
Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
- Value *LHS, Value *RHS) {
+ Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
// Fold a binop with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS))
@@ -188,20 +187,22 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
- // Conservatively, do not use any instruction which has any of wrap/exact
- // flags installed.
- // TODO: Instead of simply disable poison instructions we can be clever
- // here and match SCEV to this instruction.
- auto canGeneratePoison = [](Instruction *I) {
- if (isa<OverflowingBinaryOperator>(I) &&
- (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
- return true;
+ auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
+ // Ensure that no-wrap flags match.
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
+ return true;
+ if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
+ return true;
+ }
+ // Conservatively, do not use any instruction which has any of exact
+ // flags installed.
if (isa<PossiblyExactOperator>(I) && I->isExact())
return true;
return false;
};
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
- IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
+ IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
return &*IP;
if (IP == BlockBegin) break;
}
@@ -211,19 +212,25 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
SCEVInsertPointGuard Guard(Builder, this);
- // Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
- if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) break;
+ if (IsSafeToHoist) {
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
- // Ok, move up a level.
- Builder.SetInsertPoint(Preheader->getTerminator());
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
}
// If we haven't found this binop, insert it.
Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
BO->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BO->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BO->setHasNoSignedWrap();
rememberInstruction(BO);
return BO;
@@ -695,7 +702,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
@@ -735,7 +742,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
Sum = InsertNoopCastOfTo(Sum, Ty);
- Sum = InsertBinop(Instruction::Sub, Sum, W);
+ Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
@@ -743,7 +751,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
Sum = InsertNoopCastOfTo(Sum, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Sum)) std::swap(Sum, W);
- Sum = InsertBinop(Instruction::Add, Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
++I;
}
}
@@ -762,7 +771,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
@@ -795,9 +804,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
if (Exponent & 1)
Result = P;
for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
- P = InsertBinop(Instruction::Mul, P, P);
+ P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
if (Exponent & BinExp)
- Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
+ Result = Result ? InsertBinop(Instruction::Mul, Result, P,
+ SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true)
+ : P;
}
I = E;
@@ -812,7 +825,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
} else if (I->second->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
Prod = InsertNoopCastOfTo(Prod, Ty);
- Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
++I;
} else {
// A simple mul.
@@ -824,10 +838,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
if (match(W, m_Power2(RHS))) {
// Canonicalize Prod*(1<<C) to Prod<<C.
assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+ auto NWFlags = S->getNoWrapFlags();
+ // clear nsw flag if shl will produce poison value.
+ if (RHS->logBase2() == RHS->getBitWidth() - 1)
+ NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
Prod = InsertBinop(Instruction::Shl, Prod,
- ConstantInt::get(Ty, RHS->logBase2()));
+ ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
+ /*IsSafeToHoist*/ true);
} else {
- Prod = InsertBinop(Instruction::Mul, Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
}
}
}
@@ -843,11 +863,13 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
- ConstantInt::get(Ty, RHS.logBase2()));
+ ConstantInt::get(Ty, RHS.logBase2()),
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
Value *RHS = expandCodeFor(S->getRHS(), Ty);
- return InsertBinop(Instruction::UDiv, LHS, RHS);
+ return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
/// Move parts of Base into Rest to leave Base with the minimal
@@ -1634,7 +1656,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
- if (S->getOperand(i)->getType() != Ty) {
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
@@ -1658,7 +1681,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
- if (S->getOperand(i)->getType() != Ty) {
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
@@ -1676,6 +1700,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
return LHS;
}
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
setInsertPoint(IP);
@@ -1732,49 +1806,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
Instruction *InsertPt = &*Builder.GetInsertPoint();
- for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
- L = L->getParentLoop())
- if (SE.isLoopInvariant(S, L)) {
- if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
- InsertPt = Preheader->getTerminator();
- else {
- // LSR sets the insertion point for AddRec start/step values to the
- // block start to simplify value reuse, even though it's an invalid
- // position. SCEVExpander must correct for this in all cases.
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- }
- } else {
- // We can move insertion point only if there is no div or rem operations
- // otherwise we are risky to move it over the check for zero denominator.
- auto SafeToHoist = [](const SCEV *S) {
- return !SCEVExprContains(S, [](const SCEV *S) {
- if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
- if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
- // Division by non-zero constants can be hoisted.
- return SC->getValue()->isZero();
- // All other divisions should not be moved as they may be
- // divisions by zero and should be kept within the
- // conditions of the surrounding loops that guard their
- // execution (see PR35406).
- return true;
- }
- return false;
- });
- };
- // If the SCEV is computable at this level, insert it into the header
- // after the PHIs (and after any other instructions that we've inserted
- // there) so that it is guaranteed to dominate any user inside the loop.
- if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
- SafeToHoist(S))
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
- (isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = &*std::next(InsertPt->getIterator());
+
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
+ if (SafeToHoist(S)) {
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+ L = L->getParentLoop()) {
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(InsertPt) ||
+ isa<DbgInfoIntrinsic>(InsertPt)))
+ InsertPt = &*std::next(InsertPt->getIterator());
+ break;
}
- break;
}
+ }
+
+ // IndVarSimplify sometimes sets the insertion point at the block start, even
+ // when there are PHIs at that point. We must correct for this.
+ if (isa<PHINode>(*InsertPt))
+ InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
@@ -2071,10 +2151,13 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
// If the divisor is a power of two and the SCEV type fits in a native
- // integer, consider the division cheap irrespective of whether it occurs in
- // the user code since it can be lowered into a right shift.
+ // integer (and the LHS not expensive), consider the division cheap
+ // irrespective of whether it occurs in the user code since it can be
+ // lowered into a right shift.
if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
if (SC->getAPInt().isPowerOf2()) {
+ if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed))
+ return true;
const DataLayout &DL =
L->getHeader()->getParent()->getParent()->getDataLayout();
unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -2102,7 +2185,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
// the exit condition.
- if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ if (isa<SCEVMinMaxExpr>(S))
return true;
// Recurse past nary expressions, which commonly occur in the
@@ -2339,6 +2422,24 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
ScalarEvolution &SE) {
- return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
+ if (!isSafeToExpand(S, SE))
+ return false;
+ // We have to prove that the expanded site of S dominates InsertionPoint.
+ // This is easy when not in the same block, but hard when S is an instruction
+ // to be expanded somewhere inside the same block as our insertion point.
+ // What we really need here is something analogous to an OrderedBasicBlock,
+ // but for the moment, we paper over the problem by handling two common and
+ // cheap to check cases.
+ if (SE.properlyDominates(S, InsertionPoint->getParent()))
+ return true;
+ if (SE.dominates(S, InsertionPoint->getParent())) {
+ if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
+ return true;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ for (const Value *V : InsertionPoint->operand_values())
+ if (V == U->getValue())
+ return true;
+ }
+ return false;
}
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 3740039b8f86..209ae66ca53e 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionNormalization.cpp - See below -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 9a581fe46afc..094e4a3d5dc8 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -1,9 +1,8 @@
//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,9 +75,10 @@ public:
} // end anonymous namespace
AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// Get the attached MDNodes.
const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
@@ -92,13 +92,14 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
return NoAlias;
// If they may alias, chain to the next AliasAnalysis.
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
if (!mayAliasInScopes(Loc.AATags.Scope,
Call->getMetadata(LLVMContext::MD_noalias)))
@@ -108,13 +109,14 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
Call2->getMetadata(LLVMContext::MD_noalias)))
@@ -124,7 +126,7 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
Call1->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
index 66b03845864f..4cf235db86eb 100644
--- a/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -1,9 +1,8 @@
//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -416,7 +415,9 @@ class StackSafetyDataFlowAnalysis {
updateOneNode(F.first, F.second);
}
void runDataFlow();
+#ifndef NDEBUG
void verifyFixedPoint();
+#endif
public:
StackSafetyDataFlowAnalysis(
@@ -527,11 +528,13 @@ void StackSafetyDataFlowAnalysis::runDataFlow() {
}
}
+#ifndef NDEBUG
void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
WorkList.clear();
updateAllNodes();
assert(WorkList.empty());
}
+#endif
StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
runDataFlow();
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index 2f20cd12506c..60ea2451b0ef 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -1,9 +1,8 @@
//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
index e1a7e4476d12..3cf248a31142 100644
--- a/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -1,10 +1,9 @@
//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
//--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,14 +218,9 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
assert(JoinBlocks);
- // immediate post dominator (no join block beyond that block)
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -341,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
// already available in cache?
auto ItCached = CachedLoopExitJoins.find(&Loop);
- if (ItCached != CachedLoopExitJoins.end())
+ if (ItCached != CachedLoopExitJoins.end()) {
return *ItCached->second;
+ }
+
+ // dont propagte beyond the immediate post dom of the loop
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
+ IpdNode = IpdNode->getIDom();
+ PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ }
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -366,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
+ // dont propagate beyond the immediate post dominator of the branch
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp
index c2d7bb11a4cf..22766e5f07f5 100644
--- a/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/lib/Analysis/SyntheticCountsUtils.cpp
@@ -1,9 +1,8 @@
//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 4643f75da42d..ef139d3257d2 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
+ clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
+ "IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library")));
@@ -50,6 +51,16 @@ static bool hasSinCosPiStret(const Triple &T) {
return true;
}
+static bool hasBcmp(const Triple &TT) {
+ // Posix removed support from bcmp() in 2001, but the glibc and several
+ // implementations of the libc still have it.
+ if (TT.isOSLinux())
+ return TT.isGNUEnvironment() || TT.isMusl();
+ // Both NetBSD and OpenBSD are planning to remove the function. Windows does
+ // not have it.
+ return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+}
+
/// Initialize the set of available library functions based on the specified
/// target triple. This should be carefully written so that a missing target
/// triple gets a sane set of defaults.
@@ -78,8 +89,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
ShouldSignExtI32Param = false;
// PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
// returns corresponding to C-level ints and unsigned ints.
- if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
- T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+ if (T.isPPC64() || T.getArch() == Triple::sparcv9 ||
+ T.getArch() == Triple::systemz) {
ShouldExtI32Param = true;
ShouldExtI32Return = true;
}
@@ -142,6 +153,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_sincospif_stret);
}
+ if (!hasBcmp(T))
+ TLI.setUnavailable(LibFunc_bcmp);
+
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -153,33 +167,82 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003");
}
- // iprintf and friends are only available on XCore and TCE.
- if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ // iprintf and friends are only available on XCore, TCE, and Emscripten.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce &&
+ T.getOS() != Triple::Emscripten) {
TLI.setUnavailable(LibFunc_iprintf);
TLI.setUnavailable(LibFunc_siprintf);
TLI.setUnavailable(LibFunc_fiprintf);
}
+ // __small_printf and friends are only available on Emscripten.
+ if (T.getOS() != Triple::Emscripten) {
+ TLI.setUnavailable(LibFunc_small_printf);
+ TLI.setUnavailable(LibFunc_small_sprintf);
+ TLI.setUnavailable(LibFunc_small_fprintf);
+ }
+
if (T.isOSWindows() && !T.isOSCygMing()) {
- // Win32 does not support long double
+ // XXX: The earliest documentation available at the moment is for VS2015/VC19:
+ // https://docs.microsoft.com/en-us/cpp/c-runtime-library/floating-point-support?view=vs-2015
+ // XXX: In order to use an MSVCRT older than VC19,
+ // the specific library version must be explicit in the target triple,
+ // e.g., x86_64-pc-windows-msvc18.
+ bool hasPartialC99 = true;
+ if (T.isKnownWindowsMSVCEnvironment()) {
+ unsigned Major, Minor, Micro;
+ T.getEnvironmentVersion(Major, Minor, Micro);
+ hasPartialC99 = (Major == 0 || Major >= 19);
+ }
+
+ // Latest targets support C89 math functions, in part.
+ bool isARM = (T.getArch() == Triple::aarch64 ||
+ T.getArch() == Triple::arm);
+ bool hasPartialFloat = (isARM ||
+ T.getArch() == Triple::x86_64);
+
+ // Win32 does not support float C89 math functions, in general.
+ if (!hasPartialFloat) {
+ TLI.setUnavailable(LibFunc_acosf);
+ TLI.setUnavailable(LibFunc_asinf);
+ TLI.setUnavailable(LibFunc_atan2f);
+ TLI.setUnavailable(LibFunc_atanf);
+ TLI.setUnavailable(LibFunc_ceilf);
+ TLI.setUnavailable(LibFunc_cosf);
+ TLI.setUnavailable(LibFunc_coshf);
+ TLI.setUnavailable(LibFunc_expf);
+ TLI.setUnavailable(LibFunc_floorf);
+ TLI.setUnavailable(LibFunc_fmodf);
+ TLI.setUnavailable(LibFunc_log10f);
+ TLI.setUnavailable(LibFunc_logf);
+ TLI.setUnavailable(LibFunc_modff);
+ TLI.setUnavailable(LibFunc_powf);
+ TLI.setUnavailable(LibFunc_sinf);
+ TLI.setUnavailable(LibFunc_sinhf);
+ TLI.setUnavailable(LibFunc_sqrtf);
+ TLI.setUnavailable(LibFunc_tanf);
+ TLI.setUnavailable(LibFunc_tanhf);
+ }
+ if (!isARM)
+ TLI.setUnavailable(LibFunc_fabsf);
+ TLI.setUnavailable(LibFunc_frexpf);
+ TLI.setUnavailable(LibFunc_ldexpf);
+
+ // Win32 does not support long double C89 math functions.
TLI.setUnavailable(LibFunc_acosl);
TLI.setUnavailable(LibFunc_asinl);
- TLI.setUnavailable(LibFunc_atanl);
TLI.setUnavailable(LibFunc_atan2l);
+ TLI.setUnavailable(LibFunc_atanl);
TLI.setUnavailable(LibFunc_ceill);
- TLI.setUnavailable(LibFunc_copysignl);
TLI.setUnavailable(LibFunc_cosl);
TLI.setUnavailable(LibFunc_coshl);
TLI.setUnavailable(LibFunc_expl);
- TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
TLI.setUnavailable(LibFunc_fabsl);
TLI.setUnavailable(LibFunc_floorl);
- TLI.setUnavailable(LibFunc_fmaxl);
- TLI.setUnavailable(LibFunc_fminl);
TLI.setUnavailable(LibFunc_fmodl);
TLI.setUnavailable(LibFunc_frexpl);
- TLI.setUnavailable(LibFunc_ldexpf);
TLI.setUnavailable(LibFunc_ldexpl);
+ TLI.setUnavailable(LibFunc_log10l);
TLI.setUnavailable(LibFunc_logl);
TLI.setUnavailable(LibFunc_modfl);
TLI.setUnavailable(LibFunc_powl);
@@ -189,81 +252,66 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_tanl);
TLI.setUnavailable(LibFunc_tanhl);
- // Win32 only has C89 math
- TLI.setUnavailable(LibFunc_acosh);
- TLI.setUnavailable(LibFunc_acoshf);
+ // Win32 does not fully support C99 math functions.
+ if (!hasPartialC99) {
+ TLI.setUnavailable(LibFunc_acosh);
+ TLI.setUnavailable(LibFunc_acoshf);
+ TLI.setUnavailable(LibFunc_asinh);
+ TLI.setUnavailable(LibFunc_asinhf);
+ TLI.setUnavailable(LibFunc_atanh);
+ TLI.setUnavailable(LibFunc_atanhf);
+ TLI.setAvailableWithName(LibFunc_cabs, "_cabs");
+ TLI.setUnavailable(LibFunc_cabsf);
+ TLI.setUnavailable(LibFunc_cbrt);
+ TLI.setUnavailable(LibFunc_cbrtf);
+ TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
+ TLI.setAvailableWithName(LibFunc_copysignf, "_copysignf");
+ TLI.setUnavailable(LibFunc_exp2);
+ TLI.setUnavailable(LibFunc_exp2f);
+ TLI.setUnavailable(LibFunc_expm1);
+ TLI.setUnavailable(LibFunc_expm1f);
+ TLI.setUnavailable(LibFunc_fmax);
+ TLI.setUnavailable(LibFunc_fmaxf);
+ TLI.setUnavailable(LibFunc_fmin);
+ TLI.setUnavailable(LibFunc_fminf);
+ TLI.setUnavailable(LibFunc_log1p);
+ TLI.setUnavailable(LibFunc_log1pf);
+ TLI.setUnavailable(LibFunc_log2);
+ TLI.setUnavailable(LibFunc_log2f);
+ TLI.setAvailableWithName(LibFunc_logb, "_logb");
+ if (hasPartialFloat)
+ TLI.setAvailableWithName(LibFunc_logbf, "_logbf");
+ else
+ TLI.setUnavailable(LibFunc_logbf);
+ TLI.setUnavailable(LibFunc_rint);
+ TLI.setUnavailable(LibFunc_rintf);
+ TLI.setUnavailable(LibFunc_round);
+ TLI.setUnavailable(LibFunc_roundf);
+ TLI.setUnavailable(LibFunc_trunc);
+ TLI.setUnavailable(LibFunc_truncf);
+ }
+
+ // Win32 does not support long double C99 math functions.
TLI.setUnavailable(LibFunc_acoshl);
- TLI.setUnavailable(LibFunc_asinh);
- TLI.setUnavailable(LibFunc_asinhf);
TLI.setUnavailable(LibFunc_asinhl);
- TLI.setUnavailable(LibFunc_atanh);
- TLI.setUnavailable(LibFunc_atanhf);
TLI.setUnavailable(LibFunc_atanhl);
- TLI.setUnavailable(LibFunc_cabs);
- TLI.setUnavailable(LibFunc_cabsf);
TLI.setUnavailable(LibFunc_cabsl);
- TLI.setUnavailable(LibFunc_cbrt);
- TLI.setUnavailable(LibFunc_cbrtf);
TLI.setUnavailable(LibFunc_cbrtl);
- TLI.setUnavailable(LibFunc_exp2);
- TLI.setUnavailable(LibFunc_exp2f);
+ TLI.setUnavailable(LibFunc_copysignl);
TLI.setUnavailable(LibFunc_exp2l);
- TLI.setUnavailable(LibFunc_expm1);
- TLI.setUnavailable(LibFunc_expm1f);
TLI.setUnavailable(LibFunc_expm1l);
- TLI.setUnavailable(LibFunc_log2);
- TLI.setUnavailable(LibFunc_log2f);
- TLI.setUnavailable(LibFunc_log2l);
- TLI.setUnavailable(LibFunc_log1p);
- TLI.setUnavailable(LibFunc_log1pf);
+ TLI.setUnavailable(LibFunc_fmaxl);
+ TLI.setUnavailable(LibFunc_fminl);
TLI.setUnavailable(LibFunc_log1pl);
- TLI.setUnavailable(LibFunc_logb);
- TLI.setUnavailable(LibFunc_logbf);
+ TLI.setUnavailable(LibFunc_log2l);
TLI.setUnavailable(LibFunc_logbl);
- TLI.setUnavailable(LibFunc_nearbyint);
- TLI.setUnavailable(LibFunc_nearbyintf);
TLI.setUnavailable(LibFunc_nearbyintl);
- TLI.setUnavailable(LibFunc_rint);
- TLI.setUnavailable(LibFunc_rintf);
TLI.setUnavailable(LibFunc_rintl);
- TLI.setUnavailable(LibFunc_round);
- TLI.setUnavailable(LibFunc_roundf);
TLI.setUnavailable(LibFunc_roundl);
- TLI.setUnavailable(LibFunc_trunc);
- TLI.setUnavailable(LibFunc_truncf);
TLI.setUnavailable(LibFunc_truncl);
- // Win32 provides some C99 math with mangled names
- TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
-
- if (T.getArch() == Triple::x86) {
- // Win32 on x86 implements single-precision math functions as macros
- TLI.setUnavailable(LibFunc_acosf);
- TLI.setUnavailable(LibFunc_asinf);
- TLI.setUnavailable(LibFunc_atanf);
- TLI.setUnavailable(LibFunc_atan2f);
- TLI.setUnavailable(LibFunc_ceilf);
- TLI.setUnavailable(LibFunc_copysignf);
- TLI.setUnavailable(LibFunc_cosf);
- TLI.setUnavailable(LibFunc_coshf);
- TLI.setUnavailable(LibFunc_expf);
- TLI.setUnavailable(LibFunc_floorf);
- TLI.setUnavailable(LibFunc_fminf);
- TLI.setUnavailable(LibFunc_fmaxf);
- TLI.setUnavailable(LibFunc_fmodf);
- TLI.setUnavailable(LibFunc_logf);
- TLI.setUnavailable(LibFunc_log10f);
- TLI.setUnavailable(LibFunc_modff);
- TLI.setUnavailable(LibFunc_powf);
- TLI.setUnavailable(LibFunc_sinf);
- TLI.setUnavailable(LibFunc_sinhf);
- TLI.setUnavailable(LibFunc_sqrtf);
- TLI.setUnavailable(LibFunc_tanf);
- TLI.setUnavailable(LibFunc_tanhf);
- }
-
- // Win32 does *not* provide these functions, but they are
- // generally available on POSIX-compliant systems:
+ // Win32 does not support these functions, but
+ // they are generally available on POSIX-compliant systems.
TLI.setUnavailable(LibFunc_access);
TLI.setUnavailable(LibFunc_bcmp);
TLI.setUnavailable(LibFunc_bcopy);
@@ -318,12 +366,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_utime);
TLI.setUnavailable(LibFunc_utimes);
TLI.setUnavailable(LibFunc_write);
-
- // Win32 does *not* provide provide these functions, but they are
- // specified by C99:
- TLI.setUnavailable(LibFunc_atoll);
- TLI.setUnavailable(LibFunc_frexpf);
- TLI.setUnavailable(LibFunc_llabs);
}
switch (T.getOS()) {
@@ -651,11 +693,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return ((NumParams == 2 || NumParams == 3) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_strcat_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_strcat:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType());
+ case LibFunc_strncat_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_strncat:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
@@ -674,6 +726,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0) == PCharTy);
+ case LibFunc_strlcat_chk:
+ case LibFunc_strlcpy_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
+ case LibFunc_strlcat:
+ case LibFunc_strlcpy:
+ return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ IsSizeTTy(FTy.getParamType(2));
+
case LibFunc_strncpy_chk:
case LibFunc_stpncpy_chk:
--NumParams;
@@ -739,14 +804,32 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_stat:
case LibFunc_statvfs:
case LibFunc_siprintf:
+ case LibFunc_small_sprintf:
case LibFunc_sprintf:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32));
+
+ case LibFunc_sprintf_chk:
+ return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(2)) &&
+ FTy.getParamType(3)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
+
case LibFunc_snprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32));
+
+ case LibFunc_snprintf_chk:
+ return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+ IsSizeTTy(FTy.getParamType(1)) &&
+ FTy.getParamType(2)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(3)) &&
+ FTy.getParamType(4)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
+
case LibFunc_setitimer:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
@@ -795,6 +878,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isIntegerTy() &&
IsSizeTTy(FTy.getParamType(2)));
+ case LibFunc_memccpy_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_memccpy:
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
case LibFunc_memalign:
@@ -836,6 +924,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_getenv:
case LibFunc_getpwnam:
case LibFunc_iprintf:
+ case LibFunc_small_printf:
case LibFunc_pclose:
case LibFunc_perror:
case LibFunc_printf:
@@ -915,6 +1004,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isPointerTy());
case LibFunc_fscanf:
case LibFunc_fiprintf:
+ case LibFunc_small_fprintf:
case LibFunc_fprintf:
return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() &&
FTy.getParamType(0)->isPointerTy() &&
@@ -961,9 +1051,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vsprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_vsprintf_chk:
+ return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
case LibFunc_vsnprintf:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
+ case LibFunc_vsnprintf_chk:
+ return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(2)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
case LibFunc_open:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
case LibFunc_opendir:
@@ -1391,6 +1489,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
LibFunc &F) const {
+ // Intrinsics don't overlap w/libcalls; if our module has a large number of
+ // intrinsics, this ends up being an interesting compile time win since we
+ // avoid string normalization and comparison.
+ if (FDecl.isIntrinsic()) return false;
+
const DataLayout *DL =
FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
return getLibFunc(FDecl.getName(), F) &&
@@ -1430,151 +1533,24 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
switch (VecLib) {
case Accelerate: {
const VecDesc VecFuncs[] = {
- // Floating-Point Arithmetic and Auxiliary Functions
- {"ceilf", "vceilf", 4},
- {"fabsf", "vfabsf", 4},
- {"llvm.fabs.f32", "vfabsf", 4},
- {"floorf", "vfloorf", 4},
- {"sqrtf", "vsqrtf", 4},
- {"llvm.sqrt.f32", "vsqrtf", 4},
-
- // Exponential and Logarithmic Functions
- {"expf", "vexpf", 4},
- {"llvm.exp.f32", "vexpf", 4},
- {"expm1f", "vexpm1f", 4},
- {"logf", "vlogf", 4},
- {"llvm.log.f32", "vlogf", 4},
- {"log1pf", "vlog1pf", 4},
- {"log10f", "vlog10f", 4},
- {"llvm.log10.f32", "vlog10f", 4},
- {"logbf", "vlogbf", 4},
-
- // Trigonometric Functions
- {"sinf", "vsinf", 4},
- {"llvm.sin.f32", "vsinf", 4},
- {"cosf", "vcosf", 4},
- {"llvm.cos.f32", "vcosf", 4},
- {"tanf", "vtanf", 4},
- {"asinf", "vasinf", 4},
- {"acosf", "vacosf", 4},
- {"atanf", "vatanf", 4},
-
- // Hyperbolic Functions
- {"sinhf", "vsinhf", 4},
- {"coshf", "vcoshf", 4},
- {"tanhf", "vtanhf", 4},
- {"asinhf", "vasinhf", 4},
- {"acoshf", "vacoshf", 4},
- {"atanhf", "vatanhf", 4},
+ #define TLI_DEFINE_ACCELERATE_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
+ case MASSV: {
+ const VecDesc VecFuncs[] = {
+ #define TLI_DEFINE_MASSV_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
};
addVectorizableFunctions(VecFuncs);
break;
}
case SVML: {
const VecDesc VecFuncs[] = {
- {"sin", "__svml_sin2", 2},
- {"sin", "__svml_sin4", 4},
- {"sin", "__svml_sin8", 8},
-
- {"sinf", "__svml_sinf4", 4},
- {"sinf", "__svml_sinf8", 8},
- {"sinf", "__svml_sinf16", 16},
-
- {"llvm.sin.f64", "__svml_sin2", 2},
- {"llvm.sin.f64", "__svml_sin4", 4},
- {"llvm.sin.f64", "__svml_sin8", 8},
-
- {"llvm.sin.f32", "__svml_sinf4", 4},
- {"llvm.sin.f32", "__svml_sinf8", 8},
- {"llvm.sin.f32", "__svml_sinf16", 16},
-
- {"cos", "__svml_cos2", 2},
- {"cos", "__svml_cos4", 4},
- {"cos", "__svml_cos8", 8},
-
- {"cosf", "__svml_cosf4", 4},
- {"cosf", "__svml_cosf8", 8},
- {"cosf", "__svml_cosf16", 16},
-
- {"llvm.cos.f64", "__svml_cos2", 2},
- {"llvm.cos.f64", "__svml_cos4", 4},
- {"llvm.cos.f64", "__svml_cos8", 8},
-
- {"llvm.cos.f32", "__svml_cosf4", 4},
- {"llvm.cos.f32", "__svml_cosf8", 8},
- {"llvm.cos.f32", "__svml_cosf16", 16},
-
- {"pow", "__svml_pow2", 2},
- {"pow", "__svml_pow4", 4},
- {"pow", "__svml_pow8", 8},
-
- {"powf", "__svml_powf4", 4},
- {"powf", "__svml_powf8", 8},
- {"powf", "__svml_powf16", 16},
-
- { "__pow_finite", "__svml_pow2", 2 },
- { "__pow_finite", "__svml_pow4", 4 },
- { "__pow_finite", "__svml_pow8", 8 },
-
- { "__powf_finite", "__svml_powf4", 4 },
- { "__powf_finite", "__svml_powf8", 8 },
- { "__powf_finite", "__svml_powf16", 16 },
-
- {"llvm.pow.f64", "__svml_pow2", 2},
- {"llvm.pow.f64", "__svml_pow4", 4},
- {"llvm.pow.f64", "__svml_pow8", 8},
-
- {"llvm.pow.f32", "__svml_powf4", 4},
- {"llvm.pow.f32", "__svml_powf8", 8},
- {"llvm.pow.f32", "__svml_powf16", 16},
-
- {"exp", "__svml_exp2", 2},
- {"exp", "__svml_exp4", 4},
- {"exp", "__svml_exp8", 8},
-
- {"expf", "__svml_expf4", 4},
- {"expf", "__svml_expf8", 8},
- {"expf", "__svml_expf16", 16},
-
- { "__exp_finite", "__svml_exp2", 2 },
- { "__exp_finite", "__svml_exp4", 4 },
- { "__exp_finite", "__svml_exp8", 8 },
-
- { "__expf_finite", "__svml_expf4", 4 },
- { "__expf_finite", "__svml_expf8", 8 },
- { "__expf_finite", "__svml_expf16", 16 },
-
- {"llvm.exp.f64", "__svml_exp2", 2},
- {"llvm.exp.f64", "__svml_exp4", 4},
- {"llvm.exp.f64", "__svml_exp8", 8},
-
- {"llvm.exp.f32", "__svml_expf4", 4},
- {"llvm.exp.f32", "__svml_expf8", 8},
- {"llvm.exp.f32", "__svml_expf16", 16},
-
- {"log", "__svml_log2", 2},
- {"log", "__svml_log4", 4},
- {"log", "__svml_log8", 8},
-
- {"logf", "__svml_logf4", 4},
- {"logf", "__svml_logf8", 8},
- {"logf", "__svml_logf16", 16},
-
- { "__log_finite", "__svml_log2", 2 },
- { "__log_finite", "__svml_log4", 4 },
- { "__log_finite", "__svml_log8", 8 },
-
- { "__logf_finite", "__svml_logf4", 4 },
- { "__logf_finite", "__svml_logf8", 8 },
- { "__logf_finite", "__svml_logf16", 16 },
-
- {"llvm.log.f64", "__svml_log2", 2},
- {"llvm.log.f64", "__svml_log4", 4},
- {"llvm.log.f64", "__svml_log8", 8},
-
- {"llvm.log.f32", "__svml_logf4", 4},
- {"llvm.log.f32", "__svml_logf8", 8},
- {"llvm.log.f32", "__svml_logf16", 16},
+ #define TLI_DEFINE_SVML_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
};
addVectorizableFunctions(VecFuncs);
break;
@@ -1589,9 +1565,8 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
if (funcName.empty())
return false;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- VectorDescs.begin(), VectorDescs.end(), funcName,
- compareWithScalarFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName);
return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
}
@@ -1600,8 +1575,8 @@ StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
F = sanitizeFunctionName(F);
if (F.empty())
return F;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
if (I->VectorizationFactor == VF)
return I->VectorFnName;
@@ -1616,8 +1591,8 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
if (F.empty())
return F;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(ScalarDescs, F, compareWithVectorFnName);
if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
return StringRef();
VF = I->VectorizationFactor;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9151d46c6cce..eb04c34453fb 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,6 +18,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopIterator.h"
#include <utility>
using namespace llvm;
@@ -41,6 +42,101 @@ struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
};
}
+bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
+ // If the loop has irreducible control flow, it can not be converted to
+ // Hardware loop.
+ LoopBlocksRPO RPOT(L);
+ RPOT.perform(&LI);
+ if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
+ return false;
+ return true;
+}
+
+bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
+ LoopInfo &LI, DominatorTree &DT,
+ bool ForceNestedLoop,
+ bool ForceHardwareLoopPHI) {
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ IE = ExitingBlocks.end();
+ I != IE; ++I) {
+ BasicBlock *BB = *I;
+
+ // If we pass the updated counter back through a phi, we need to know
+ // which latch the updated value will be coming from.
+ if (!L->isLoopLatch(BB)) {
+ if (ForceHardwareLoopPHI || CounterInReg)
+ continue;
+ }
+
+ const SCEV *EC = SE.getExitCount(L, BB);
+ if (isa<SCEVCouldNotCompute>(EC))
+ continue;
+ if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+ if (ConstEC->getValue()->isZero())
+ continue;
+ } else if (!SE.isLoopInvariant(EC, L))
+ continue;
+
+ if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
+ continue;
+
+ // If this exiting block is contained in a nested loop, it is not eligible
+ // for insertion of the branch-and-decrement since the inner loop would
+ // end up messing up the value in the CTR.
+ if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
+ continue;
+
+ // We now have a loop-invariant count of loop iterations (which is not the
+ // constant zero) for which we know that this loop will not exit via this
+ // existing block.
+
+ // We need to make sure that this block will run on every loop iteration.
+ // For this to be true, we must dominate all blocks with backedges. Such
+ // blocks are in-loop predecessors to the header block.
+ bool NotAlways = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PIE = pred_end(L->getHeader());
+ PI != PIE; ++PI) {
+ if (!L->contains(*PI))
+ continue;
+
+ if (!DT.dominates(*I, *PI)) {
+ NotAlways = true;
+ break;
+ }
+ }
+
+ if (NotAlways)
+ continue;
+
+ // Make sure this blocks ends with a conditional branch.
+ Instruction *TI = BB->getTerminator();
+ if (!TI)
+ continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
+ continue;
+
+ ExitBranch = BI;
+ } else
+ continue;
+
+ // Note that this block may not be the loop latch block, even if the loop
+ // has a latch block.
+ ExitBlock = *I;
+ ExitCount = EC;
+ break;
+ }
+
+ if (!ExitBlock)
+ return false;
+ return true;
+}
+
TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
@@ -61,15 +157,17 @@ int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
return Cost;
}
-int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
- int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
+ const User *U) const {
+ int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const {
- int Cost = TTIImpl->getCallCost(F, Arguments);
+ ArrayRef<const Value *> Arguments,
+ const User *U) const {
+ int Cost = TTIImpl->getCallCost(F, Arguments, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -78,6 +176,10 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
return TTIImpl->getInliningThresholdMultiplier();
}
+int TargetTransformInfo::getInlinerVectorBonusPercent() const {
+ return TTIImpl->getInlinerVectorBonusPercent();
+}
+
int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const {
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
@@ -89,8 +191,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I,
}
int TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
- int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
+ const User *U) const {
+ int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -128,6 +231,12 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
+bool TargetTransformInfo::isHardwareLoopProfitable(
+ Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
+ return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+}
+
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -159,10 +268,21 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
return TTIImpl->canMacroFuseCmp();
}
+bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
+ ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) const {
+ return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+}
+
bool TargetTransformInfo::shouldFavorPostInc() const {
return TTIImpl->shouldFavorPostInc();
}
+bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
+ return TTIImpl->shouldFavorBackedgeIndex(L);
+}
+
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
return TTIImpl->isLegalMaskedStore(DataType);
}
@@ -171,6 +291,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedLoad(DataType);
}
+bool TargetTransformInfo::isLegalNTStore(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTStore(DataType, Alignment);
+}
+
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTLoad(DataType, Alignment);
+}
+
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
return TTIImpl->isLegalMaskedGather(DataType);
}
@@ -179,6 +309,14 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
return TTIImpl->isLegalMaskedScatter(DataType);
}
+bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
+ return TTIImpl->isLegalMaskedCompressStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
+ return TTIImpl->isLegalMaskedExpandLoad(DataType);
+}
+
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
@@ -259,9 +397,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
-const TargetTransformInfo::MemCmpExpansionOptions *
-TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
- return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
+TargetTransformInfo::MemCmpExpansionOptions
+TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
@@ -570,6 +708,12 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp,
return Cost;
}
+int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
+ int Cost = TTIImpl->getMemcpyCost(I);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const {
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
@@ -688,6 +832,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
+unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
+ return TTIImpl->getGISelRematGlobalCost();
+}
+
int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
@@ -1023,6 +1171,16 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
Op1VP, Op2VP, Operands);
}
+ case Instruction::FNeg: {
+ TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
+ TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
+ Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
+ Op2VK = OK_AnyValue;
+ Op2VP = OP_None;
+ SmallVector<const Value *, 2> Operands(I->operand_values());
+ return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+ Op1VP, Op2VP, Operands);
+ }
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
Type *CondTy = SI->getCondition()->getType();
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 4dec53151ed6..879c7172d038 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -1,9 +1,8 @@
//===- Trace.cpp - Implementation of Trace class --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 83974da30a54..3b9040aa0f52 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -368,26 +367,28 @@ static bool isStructPathTBAA(const MDNode *MD) {
}
AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// If accesses may alias, chain to the next AliasAnalysis.
if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// Otherwise return a definitive result.
return NoAlias;
}
bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI,
bool OrLocal) {
if (!EnableTBAA)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
const MDNode *M = Loc.AATags.TBAA;
if (!M)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
@@ -395,7 +396,7 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
return true;
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
FunctionModRefBehavior
@@ -421,29 +422,31 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
bool MDNode::isTBAAVtableAccess() const {
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index bd13a43b8d46..9311dfbc6eba 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -1,9 +1,8 @@
//===- TypeMetadataUtils.cpp - Utilities related to type metadata ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ValueLattice.cpp b/lib/Analysis/ValueLattice.cpp
index 7de437ca480e..a0115a0eec36 100644
--- a/lib/Analysis/ValueLattice.cpp
+++ b/lib/Analysis/ValueLattice.cpp
@@ -1,9 +1,8 @@
//===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/ValueLatticeUtils.cpp b/lib/Analysis/ValueLatticeUtils.cpp
index 22c9de4fe94d..3f9287e26ce7 100644
--- a/lib/Analysis/ValueLatticeUtils.cpp
+++ b/lib/Analysis/ValueLatticeUtils.cpp
@@ -1,9 +1,8 @@
//===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 0446426c0e66..c70906dcc629 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1,9 +1,8 @@
//===- ValueTracking.cpp - Walk computations to compute properties --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,7 +38,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -617,237 +615,242 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (Depth == MaxDepth)
continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
+ if (!Cmp)
+ continue;
+
Value *A, *B;
- auto m_V = m_CombineOr(m_Specific(V),
- m_CombineOr(m_PtrToInt(m_Specific(V)),
- m_BitCast(m_Specific(V))));
+ auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
CmpInst::Predicate Pred;
uint64_t C;
- // assume(v = a)
- if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- Known.Zero |= RHSKnown.Zero;
- Known.One |= RHSKnown.One;
- // assume(v & b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits MaskKnown(BitWidth);
- computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
- // For those bits in the mask that are known to be one, we can propagate
- // known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & MaskKnown.One;
- Known.One |= RHSKnown.One & MaskKnown.One;
- // assume(~(v & b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits MaskKnown(BitWidth);
- computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
- // For those bits in the mask that are known to be one, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & MaskKnown.One;
- Known.One |= RHSKnown.Zero & MaskKnown.One;
- // assume(v | b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- // assume(~(v | b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- // assume(v ^ b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V. For those bits in B that are known to be one,
- // we can propagate inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- Known.Zero |= RHSKnown.One & BKnown.One;
- Known.One |= RHSKnown.Zero & BKnown.One;
- // assume(~(v ^ b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V. For those bits in B that are
- // known to be one, we can propagate known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- Known.Zero |= RHSKnown.Zero & BKnown.One;
- Known.One |= RHSKnown.One & BKnown.One;
- // assume(v << c = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- RHSKnown.Zero.lshrInPlace(C);
- Known.Zero |= RHSKnown.Zero;
- RHSKnown.One.lshrInPlace(C);
- Known.One |= RHSKnown.One;
- // assume(~(v << c) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C);
- Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C);
- Known.One |= RHSKnown.Zero;
- // assume(v >> c = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.Zero << C;
- Known.One |= RHSKnown.One << C;
- // assume(~(v >> c) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C;
- Known.One |= RHSKnown.Zero << C;
- // assume(v >=_s c) where c is non-negative
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
+ switch (Cmp->getPredicate()) {
+ default:
+ break;
+ case ICmpInst::ICMP_EQ:
+ // assume(v = a)
+ if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ Known.Zero |= RHSKnown.Zero;
+ Known.One |= RHSKnown.One;
+ // assume(v & b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+ Known.One |= RHSKnown.One & MaskKnown.One;
+ // assume(~(v & b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & MaskKnown.One;
+ Known.One |= RHSKnown.Zero & MaskKnown.One;
+ // assume(v | b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ // assume(~(v | b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ // assume(v ^ b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V. For those bits in B that are known to be one,
+ // we can propagate inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ Known.Zero |= RHSKnown.One & BKnown.One;
+ Known.One |= RHSKnown.Zero & BKnown.One;
+ // assume(~(v ^ b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V. For those bits in B that are
+ // known to be one, we can propagate known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ Known.Zero |= RHSKnown.Zero & BKnown.One;
+ Known.One |= RHSKnown.One & BKnown.One;
+ // assume(v << c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ RHSKnown.Zero.lshrInPlace(C);
+ Known.Zero |= RHSKnown.Zero;
+ RHSKnown.One.lshrInPlace(C);
+ Known.One |= RHSKnown.One;
+ // assume(~(v << c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ RHSKnown.One.lshrInPlace(C);
+ Known.Zero |= RHSKnown.One;
+ RHSKnown.Zero.lshrInPlace(C);
+ Known.One |= RHSKnown.Zero;
+ // assume(v >> c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.Zero << C;
+ Known.One |= RHSKnown.One << C;
+ // assume(~(v >> c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.One << C;
+ Known.One |= RHSKnown.Zero << C;
}
- // assume(v >_s c) where c is at least -1.
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
+ break;
+ case ICmpInst::ICMP_SGE:
+ // assume(v >=_s c) where c is non-negative
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isNonNegative()) {
+ // We know that the sign bit is zero.
+ Known.makeNonNegative();
+ }
}
- // assume(v <=_s c) where c is negative
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
+ break;
+ case ICmpInst::ICMP_SGT:
+ // assume(v >_s c) where c is at least -1.
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
+ // We know that the sign bit is zero.
+ Known.makeNonNegative();
+ }
}
- // assume(v <_s c) where c is non-positive
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isZero() || RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
+ break;
+ case ICmpInst::ICMP_SLE:
+ // assume(v <=_s c) where c is negative
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isNegative()) {
+ // We know that the sign bit is one.
+ Known.makeNegative();
+ }
}
- // assume(v <=_u c)
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- // Whatever high bits in c are zero are known to be zero.
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
- // assume(v <_u c)
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- // If the RHS is known zero, then this assumption must be wrong (nothing
- // is unsigned less than zero). Signal a conflict and get out of here.
- if (RHSKnown.isZero()) {
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- break;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // assume(v <_s c) where c is non-positive
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ if (RHSKnown.isZero() || RHSKnown.isNegative()) {
+ // We know that the sign bit is one.
+ Known.makeNegative();
+ }
}
-
- // Whatever high bits in c are zero are known to be zero (if c is a power
- // of 2, then one more).
- if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
- else
+ break;
+ case ICmpInst::ICMP_ULE:
+ // assume(v <=_u c)
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero.
Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ // assume(v <_u c)
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ // If the RHS is known zero, then this assumption must be wrong (nothing
+ // is unsigned less than zero). Signal a conflict and get out of here.
+ if (RHSKnown.isZero()) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ break;
+ }
+
+ // Whatever high bits in c are zero are known to be zero (if c is a power
+ // of 2, then one more).
+ if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
+ else
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+ }
+ break;
}
}
@@ -1129,12 +1132,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Q.DL.getTypeSizeInBits(ScalarTy);
assert(SrcBitWidth && "SrcBitWidth can't be zero");
- Known = Known.zextOrTrunc(SrcBitWidth);
+ Known = Known.zextOrTrunc(SrcBitWidth, false);
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
- Known = Known.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- Known.Zero.setBitsFrom(SrcBitWidth);
+ Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case Instruction::BitCast: {
@@ -1527,6 +1527,37 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
break;
}
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+
+ // Add: Leading ones of either operand are preserved.
+ // Sub: Leading zeros of LHS and leading ones of RHS are preserved
+ // as leading zeros in the result.
+ unsigned LeadingKnown;
+ if (IsAdd)
+ LeadingKnown = std::max(Known.countMinLeadingOnes(),
+ Known2.countMinLeadingOnes());
+ else
+ LeadingKnown = std::max(Known.countMinLeadingZeros(),
+ Known2.countMinLeadingOnes());
+
+ Known = KnownBits::computeForAddSub(
+ IsAdd, /* NSW */ false, Known, Known2);
+
+ // We select between the operation result and all-ones/zero
+ // respectively, so we can preserve known ones/zeros.
+ if (IsAdd) {
+ Known.One.setHighBits(LeadingKnown);
+ Known.Zero.clearAllBits();
+ } else {
+ Known.Zero.setHighBits(LeadingKnown);
+ Known.One.clearAllBits();
+ }
+ break;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
@@ -1967,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// Must be non-zero due to null test above.
return true;
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ // See the comment for IntToPtr/PtrToInt instructions below.
+ if (CE->getOpcode() == Instruction::IntToPtr ||
+ CE->getOpcode() == Instruction::PtrToInt)
+ if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
+ Q.DL.getTypeSizeInBits(CE->getType()))
+ return isKnownNonZero(CE->getOperand(0), Depth, Q);
+ }
+
// For constant vectors, check that all elements are undefined or known
// non-zero to determine that the whole vector is known non-zero.
if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
@@ -2037,11 +2077,33 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
return true;
+ // Look through bitcast operations, GEPs, and int2ptr instructions as they
+ // do not alter the value, or at least not the nullness property of the
+ // value, e.g., int2ptr is allowed to zero/sign extend the value.
+ //
+ // Note that we have to take special care to avoid looking through
+ // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
+ // as casts that can alter the value, e.g., AddrSpaceCasts.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, Depth, Q))
return true;
+
+ if (auto *BCO = dyn_cast<BitCastOperator>(V))
+ return isKnownNonZero(BCO->getOperand(0), Depth, Q);
+
+ if (auto *I2P = dyn_cast<IntToPtrInst>(V))
+ if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <=
+ Q.DL.getTypeSizeInBits(I2P->getDestTy()))
+ return isKnownNonZero(I2P->getOperand(0), Depth, Q);
}
+ // Similar to int2ptr above, we can look through ptr2int here if the cast
+ // is a no-op or an extend and not a truncate.
+ if (auto *P2I = dyn_cast<PtrToIntInst>(V))
+ if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <=
+ Q.DL.getTypeSizeInBits(P2I->getDestTy()))
+ return isKnownNonZero(P2I->getOperand(0), Depth, Q);
+
unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
// X | Y != 0 if X != 0 or Y != 0.
@@ -3082,6 +3144,11 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
case Intrinsic::sqrt:
return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ // If either operand is not NaN, the result is not NaN.
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
+ isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
default:
return false;
}
@@ -3107,7 +3174,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
return true;
}
-Value *llvm::isBytewiseValue(Value *V) {
+Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
// All byte-wide stores are splatable, even of arbitrary variables.
if (V->getType()->isIntegerTy(8))
@@ -3120,6 +3187,10 @@ Value *llvm::isBytewiseValue(Value *V) {
if (isa<UndefValue>(V))
return UndefInt8;
+ const uint64_t Size = DL.getTypeStoreSize(V->getType());
+ if (!Size)
+ return UndefInt8;
+
Constant *C = dyn_cast<Constant>(V);
if (!C) {
// Conceptually, we could handle things like:
@@ -3146,7 +3217,8 @@ Value *llvm::isBytewiseValue(Value *V) {
else if (CFP->getType()->isDoubleTy())
Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints.
- return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
+ return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
+ : nullptr;
}
// We can handle constant integers that are multiple of 8 bits.
@@ -3159,6 +3231,17 @@ Value *llvm::isBytewiseValue(Value *V) {
}
}
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ auto PS = DL.getPointerSizeInBits(
+ cast<PointerType>(CE->getType())->getAddressSpace());
+ return isBytewiseValue(
+ ConstantExpr::getIntegerCast(CE->getOperand(0),
+ Type::getIntNTy(Ctx, PS), false),
+ DL);
+ }
+ }
+
auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
if (LHS == RHS)
return LHS;
@@ -3174,20 +3257,15 @@ Value *llvm::isBytewiseValue(Value *V) {
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
- if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
+ if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
return nullptr;
return Val;
}
- if (isa<ConstantVector>(C)) {
- Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
- return Splat ? isBytewiseValue(Splat) : nullptr;
- }
-
- if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ if (isa<ConstantAggregate>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
- if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+ if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
return nullptr;
return Val;
}
@@ -3363,57 +3441,6 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return nullptr;
}
-/// Analyze the specified pointer to see if it can be expressed as a base
-/// pointer plus a constant offset. Return the base and offset to the caller.
-Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout &DL) {
- unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType());
- APInt ByteOffset(BitWidth, 0);
-
- // We walk up the defs but use a visited set to handle unreachable code. In
- // that case, we stop after accumulating the cycle once (not that it
- // matters).
- SmallPtrSet<Value *, 16> Visited;
- while (Visited.insert(Ptr).second) {
- if (Ptr->getType()->isVectorTy())
- break;
-
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- // If one of the values we have visited is an addrspacecast, then
- // the pointer type of this GEP may be different from the type
- // of the Ptr parameter which was passed to this function. This
- // means when we construct GEPOffset, we need to use the size
- // of GEP's pointer type rather than the size of the original
- // pointer type.
- APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
- if (!GEP->accumulateConstantOffset(DL, GEPOffset))
- break;
-
- APInt OrigByteOffset(ByteOffset);
- ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
- if (ByteOffset.getMinSignedBits() > 64) {
- // Stop traversal if the pointer offset wouldn't fit into int64_t
- // (this should be removed if Offset is updated to an APInt)
- ByteOffset = OrigByteOffset;
- break;
- }
-
- Ptr = GEP->getPointerOperand();
- } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
- Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
- Ptr = cast<Operator>(Ptr)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
- if (GA->isInterposable())
- break;
- Ptr = GA->getAliasee();
- } else {
- break;
- }
- }
- Offset = ByteOffset.getSExtValue();
- return Ptr;
-}
-
bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
unsigned CharSize) {
// Make sure the GEP has exactly three arguments.
@@ -3638,7 +3665,9 @@ const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
const CallBase *Call) {
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
- Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
+ Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
+ Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
+ Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3717,26 +3746,27 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
return V;
}
-void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+void llvm::GetUnderlyingObjects(const Value *V,
+ SmallVectorImpl<const Value *> &Objects,
const DataLayout &DL, LoopInfo *LI,
unsigned MaxLookup) {
- SmallPtrSet<Value *, 4> Visited;
- SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
Worklist.push_back(V);
do {
- Value *P = Worklist.pop_back_val();
+ const Value *P = Worklist.pop_back_val();
P = GetUnderlyingObject(P, DL, MaxLookup);
if (!Visited.insert(P).second)
continue;
- if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ if (auto *SI = dyn_cast<SelectInst>(P)) {
Worklist.push_back(SI->getTrueValue());
Worklist.push_back(SI->getFalseValue());
continue;
}
- if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ if (auto *PN = dyn_cast<PHINode>(P)) {
// If this PHI changes the underlying object in every iteration of the
// loop, don't look through it. Consider:
// int **A;
@@ -3797,10 +3827,10 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
do {
V = Working.pop_back_val();
- SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+ SmallVector<const Value *, 4> Objs;
+ GetUnderlyingObjects(V, Objs, DL);
- for (Value *V : Objs) {
+ for (const Value *V : Objs) {
if (!Visited.insert(V).second)
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -3888,7 +3918,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
- LI->getAlignment(), DL, CtxI, DT);
+ LI->getType(), LI->getAlignment(),
+ DL, CtxI, DT);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
@@ -3901,6 +3932,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::VAArg:
case Instruction::Alloca:
case Instruction::Invoke:
+ case Instruction::CallBr:
case Instruction::PHI:
case Instruction::Store:
case Instruction::Ret:
@@ -3926,51 +3958,46 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
}
+/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
+static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
+ switch (OR) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return OverflowResult::MayOverflow;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ return OverflowResult::AlwaysOverflowsLow;
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+ return OverflowResult::AlwaysOverflowsHigh;
+ case ConstantRange::OverflowResult::NeverOverflows:
+ return OverflowResult::NeverOverflows;
+ }
+ llvm_unreachable("Unknown OverflowResult");
+}
+
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+static ConstantRange computeConstantRangeIncludingKnownBits(
+ const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
+ KnownBits Known = computeKnownBits(
+ V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
+ ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
+ ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
+ ConstantRange::PreferredRangeType RangeType =
+ ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
+ return CR1.intersectWith(CR2, RangeType);
+}
+
OverflowResult llvm::computeOverflowForUnsignedMul(
const Value *LHS, const Value *RHS, const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- // Multiplying n * m significant bits yields a result of n + m significant
- // bits. If the total number of significant bits does not exceed the
- // result bit width (minus 1), there is no overflow.
- // This means if we have enough leading zero bits in the operands
- // we can guarantee that the result does not overflow.
- // Ref: "Hacker's Delight" by Henry Warren
- unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- KnownBits LHSKnown(BitWidth);
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
- UseInstrInfo);
- computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
- UseInstrInfo);
- // Note that underestimating the number of zero bits gives a more
- // conservative answer.
- unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
- RHSKnown.countMinLeadingZeros();
- // First handle the easy case: if we have enough zero bits there's
- // definitely no overflow.
- if (ZeroBits >= BitWidth)
- return OverflowResult::NeverOverflows;
-
- // Get the largest possible values for each operand.
- APInt LHSMax = ~LHSKnown.Zero;
- APInt RHSMax = ~RHSKnown.Zero;
-
- // We know the multiply operation doesn't overflow if the maximum values for
- // each operand will not overflow after we multiply them together.
- bool MaxOverflow;
- (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
- if (!MaxOverflow)
- return OverflowResult::NeverOverflows;
-
- // We know it always overflows if multiplying the smallest possible values for
- // the operands also results in overflow.
- bool MinOverflow;
- (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
- if (MinOverflow)
- return OverflowResult::AlwaysOverflows;
-
- return OverflowResult::MayOverflow;
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
+ ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
+ return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
}
OverflowResult
@@ -4020,69 +4047,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(
const Value *LHS, const Value *RHS, const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
- if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
-
- if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
- // The sign bit is set in both cases: this MUST overflow.
- return OverflowResult::AlwaysOverflows;
- }
-
- if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
- // The sign bit is clear in both cases: this CANNOT overflow.
- return OverflowResult::NeverOverflows;
- }
- }
-
- return OverflowResult::MayOverflow;
-}
-
-/// Return true if we can prove that adding the two values of the
-/// knownbits will not overflow.
-/// Otherwise return false.
-static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
- const KnownBits &RHSKnown) {
- // Addition of two 2's complement numbers having opposite signs will never
- // overflow.
- if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
- (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
- return true;
-
- // If either of the values is known to be non-negative, adding them can only
- // overflow if the second is also non-negative, so we can assume that.
- // Two non-negative numbers will only overflow if there is a carry to the
- // sign bit, so we can check if even when the values are as big as possible
- // there is no overflow to the sign bit.
- if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
- APInt MaxLHS = ~LHSKnown.Zero;
- MaxLHS.clearSignBit();
- APInt MaxRHS = ~RHSKnown.Zero;
- MaxRHS.clearSignBit();
- APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
- return Result.isSignBitClear();
- }
-
- // If either of the values is known to be negative, adding them can only
- // overflow if the second is also negative, so we can assume that.
- // Two negative number will only overflow if there is no carry to the sign
- // bit, so we can check if even when the values are as small as possible
- // there is overflow to the sign bit.
- if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
- APInt MinLHS = LHSKnown.One;
- MinLHS.clearSignBit();
- APInt MinRHS = RHSKnown.One;
- MinRHS.clearSignBit();
- APInt Result = std::move(MinLHS) + std::move(MinRHS);
- return Result.isSignBitSet();
- }
-
- // If we reached here it means that we know nothing about the sign bits.
- // In this case we can't know if there will be an overflow, since by
- // changing the sign bits any two values can be made to overflow.
- return false;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
}
static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
@@ -4114,30 +4085,35 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
return OverflowResult::NeverOverflows;
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
- if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
- return OverflowResult::NeverOverflows;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ OverflowResult OR =
+ mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
+ if (OR != OverflowResult::MayOverflow)
+ return OR;
// The remaining code needs Add to be available. Early returns if not so.
if (!Add)
return OverflowResult::MayOverflow;
// If the sign of Add is the same as at least one of the operands, this add
- // CANNOT overflow. This is particularly useful when the sum is
- // @llvm.assume'ed non-negative rather than proved so from analyzing its
- // operands.
+ // CANNOT overflow. If this can be determined from the known bits of the
+ // operands the above signedAddMayOverflow() check will have already done so.
+ // The only other way to improve on the known bits is from an assumption, so
+ // call computeKnownBitsFromAssume() directly.
bool LHSOrRHSKnownNonNegative =
- (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
+ (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
bool LHSOrRHSKnownNegative =
- (LHSKnown.isNegative() || RHSKnown.isNegative());
+ (LHSRange.isAllNegative() || RHSRange.isAllNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
- KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits AddKnown(LHSRange.getBitWidth());
+ computeKnownBitsFromAssume(
+ Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true));
if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
- (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
+ (AddKnown.isNegative() && LHSOrRHSKnownNegative))
return OverflowResult::NeverOverflows;
- }
}
return OverflowResult::MayOverflow;
@@ -4149,20 +4125,11 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
- // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
- if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
- return OverflowResult::NeverOverflows;
-
- // If the LHS is non-negative and the RHS negative, we always wrap.
- if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
- return OverflowResult::AlwaysOverflows;
- }
-
- return OverflowResult::MayOverflow;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+ return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
}
OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
@@ -4177,37 +4144,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
return OverflowResult::NeverOverflows;
- KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT);
-
- KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT);
-
- // Subtraction of two 2's complement numbers having identical signs will
- // never overflow.
- if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
- (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
- return OverflowResult::NeverOverflows;
-
- // TODO: implement logic similar to checkRippleForAdd
- return OverflowResult::MayOverflow;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
}
-bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
const DominatorTree &DT) {
-#ifndef NDEBUG
- auto IID = II->getIntrinsicID();
- assert((IID == Intrinsic::sadd_with_overflow ||
- IID == Intrinsic::uadd_with_overflow ||
- IID == Intrinsic::ssub_with_overflow ||
- IID == Intrinsic::usub_with_overflow ||
- IID == Intrinsic::smul_with_overflow ||
- IID == Intrinsic::umul_with_overflow) &&
- "Not an overflow intrinsic!");
-#endif
-
SmallVector<const BranchInst *, 2> GuardingBranches;
SmallVector<const ExtractValueInst *, 2> Results;
- for (const User *U : II->users()) {
+ for (const User *U : WO->users()) {
if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
@@ -4307,6 +4256,11 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
if (!CS.doesNotThrow())
return false;
+ // A function which doens't throw and has "willreturn" attribute will
+ // always return.
+ if (CS.hasFnAttr(Attribute::WillReturn))
+ return true;
+
// Non-throwing call sites can loop infinitely, call exit/pthread_exit
// etc. and thus not return. However, LLVM already assumes that
//
@@ -4325,7 +4279,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// is guaranteed to return.
return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
match(I, m_Intrinsic<Intrinsic::assume>()) ||
- match(I, m_Intrinsic<Intrinsic::sideeffect>());
+ match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
+ match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
}
// Other instructions return normally.
@@ -4333,7 +4288,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
- // TODO: This is slightly consdervative for invoke instruction since exiting
+ // TODO: This is slightly conservative for invoke instruction since exiting
// via an exception *is* normal control for them.
for (auto I = BB->begin(), E = BB->end(); I != E; ++I)
if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
@@ -4357,6 +4312,8 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
}
bool llvm::propagatesFullPoison(const Instruction *I) {
+ // TODO: This should include all instructions apart from phis, selects and
+ // call-like instructions.
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
@@ -4409,10 +4366,21 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
return I->getOperand(1);
default:
+ // Note: It's really tempting to think that a conditional branch or
+ // switch should be listed here, but that's incorrect. It's not
+ // branching off of poison which is UB, it is executing a side effecting
+ // instruction which follows the branch.
return nullptr;
}
}
+bool llvm::mustTriggerUB(const Instruction *I,
+ const SmallSet<const Value *, 16>& KnownPoison) {
+ auto *NotPoison = getGuaranteedNonFullPoisonOp(I);
+ return (NotPoison && KnownPoison.count(NotPoison));
+}
+
+
bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
// We currently only look for uses of poison values within the same basic
// block, as that makes it easier to guarantee that the uses will be
@@ -4436,8 +4404,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
while (Iter++ < MaxDepth) {
for (auto &I : make_range(Begin, End)) {
if (&I != PoisonI) {
- const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I);
- if (NotPoison != nullptr && YieldsPoison.count(NotPoison))
+ if (mustTriggerUB(&I, YieldsPoison))
return true;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
return false;
@@ -4926,6 +4893,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
return {SPF_ABS, SPNB_NA, false};
+ // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
+ if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne))
+ return {SPF_ABS, SPNB_NA, false};
+
// (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
// (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
@@ -5084,11 +5055,19 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
+ CastOp, Depth);
+}
+
+SelectPatternResult llvm::matchDecomposedSelectPattern(
+ CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+ Instruction::CastOps *CastOp, unsigned Depth) {
CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
FastMathFlags FMF;
if (isa<FPMathOperator>(CmpI))
FMF = CmpI->getFastMathFlags();
@@ -5430,3 +5409,298 @@ Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
bool CondIsTrue = TrueBB == ContextBB;
return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
}
+
+static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
+ APInt &Upper, const InstrInfoQuery &IIQ) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ // FIXME: If we have both nuw and nsw, we should reduce the range further.
+ if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+ // 'add nuw x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+ if (C->isNegative()) {
+ // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::And:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'and x, C' produces [0, C].
+ Upper = *C + 1;
+ break;
+
+ case Instruction::Or:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'or x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ break;
+
+ case Instruction::AShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
+ Lower = APInt::getSignedMinValue(Width).ashr(*C);
+ Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ unsigned ShiftAmount = Width - 1;
+ if (!C->isNullValue() && IIQ.isExact(&BO))
+ ShiftAmount = C->countTrailingZeros();
+ if (C->isNegative()) {
+ // 'ashr C, x' produces [C, C >> (Width-1)]
+ Lower = *C;
+ Upper = C->ashr(ShiftAmount) + 1;
+ } else {
+ // 'ashr C, x' produces [C >> (Width-1), C]
+ Lower = C->ashr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ }
+ break;
+
+ case Instruction::LShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'lshr x, C' produces [0, UINT_MAX >> C].
+ Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'lshr C, x' produces [C >> (Width-1), C].
+ unsigned ShiftAmount = Width - 1;
+ if (!C->isNullValue() && IIQ.isExact(&BO))
+ ShiftAmount = C->countTrailingZeros();
+ Lower = C->lshr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::Shl:
+ if (match(BO.getOperand(0), m_APInt(C))) {
+ if (IIQ.hasNoUnsignedWrap(&BO)) {
+ // 'shl nuw C, x' produces [C, C << CLZ(C)]
+ Lower = *C;
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
+ if (C->isNegative()) {
+ // 'shl nsw C, x' produces [C << CLO(C)-1, C]
+ unsigned ShiftAmount = C->countLeadingOnes() - 1;
+ Lower = C->shl(ShiftAmount);
+ Upper = *C + 1;
+ } else {
+ // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
+ unsigned ShiftAmount = C->countLeadingZeros() - 1;
+ Lower = *C;
+ Upper = C->shl(ShiftAmount) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::SDiv:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (C->isAllOnesValue()) {
+ // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin + 1;
+ Upper = IntMax + 1;
+ } else if (C->countLeadingZeros() < Width - 1) {
+ // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin.sdiv(*C);
+ Upper = IntMax.sdiv(*C);
+ if (Lower.sgt(Upper))
+ std::swap(Lower, Upper);
+ Upper = Upper + 1;
+ assert(Upper != Lower && "Upper part of range has wrapped!");
+ }
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ if (C->isMinSignedValue()) {
+ // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+ Lower = *C;
+ Upper = Lower.lshr(1) + 1;
+ } else {
+ // 'sdiv C, x' produces [-|C|, |C|].
+ Upper = C->abs() + 1;
+ Lower = (-Upper) + 1;
+ }
+ }
+ break;
+
+ case Instruction::UDiv:
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ // 'udiv x, C' produces [0, UINT_MAX / C].
+ Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'udiv C, x' produces [0, C].
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::SRem:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ // 'srem x, C' produces (-|C|, |C|).
+ Upper = C->abs();
+ Lower = (-Upper) + 1;
+ }
+ break;
+
+ case Instruction::URem:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'urem x, C' produces [0, C).
+ Upper = *C;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
+ APInt &Upper) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ // uadd.sat(x, C) produces [C, UINT_MAX].
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C)))
+ Lower = *C;
+ break;
+ case Intrinsic::sadd_sat:
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ break;
+ case Intrinsic::usub_sat:
+ // usub.sat(C, x) produces [0, C].
+ if (match(II.getOperand(0), m_APInt(C)))
+ Upper = *C + 1;
+ // usub.sat(x, C) produces [0, UINT_MAX - C].
+ else if (match(II.getOperand(1), m_APInt(C)))
+ Upper = APInt::getMaxValue(Width) - *C + 1;
+ break;
+ case Intrinsic::ssub_sat:
+ if (match(II.getOperand(0), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = *C - APInt::getSignedMinValue(Width) + 1;
+ } else {
+ // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+ Lower = *C - APInt::getSignedMaxValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ } else if (match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+ Lower = APInt::getSignedMinValue(Width) - *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ } else {
+ // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
+ APInt &Upper) {
+ const Value *LHS, *RHS;
+ SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
+ if (R.Flavor == SPF_UNKNOWN)
+ return;
+
+ unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+
+ if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
+ // If the negation part of the abs (in RHS) has the NSW flag,
+ // then the result of abs(X) is [0..SIGNED_MAX],
+ // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
+ Lower = APInt::getNullValue(BitWidth);
+ if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+ else
+ Upper = APInt::getSignedMinValue(BitWidth) + 1;
+ return;
+ }
+
+ if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
+ // The result of -abs(X) is <= 0.
+ Lower = APInt::getSignedMinValue(BitWidth);
+ Upper = APInt(BitWidth, 1);
+ return;
+ }
+
+ const APInt *C;
+ if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
+ return;
+
+ switch (R.Flavor) {
+ case SPF_UMIN:
+ Upper = *C + 1;
+ break;
+ case SPF_UMAX:
+ Lower = *C;
+ break;
+ case SPF_SMIN:
+ Lower = APInt::getSignedMinValue(BitWidth);
+ Upper = *C + 1;
+ break;
+ case SPF_SMAX:
+ Lower = *C;
+ Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+ break;
+ default:
+ break;
+ }
+}
+
+ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
+ assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
+
+ const APInt *C;
+ if (match(V, m_APInt(C)))
+ return ConstantRange(*C);
+
+ InstrInfoQuery IIQ(UseInstrInfo);
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+ APInt Lower = APInt(BitWidth, 0);
+ APInt Upper = APInt(BitWidth, 0);
+ if (auto *BO = dyn_cast<BinaryOperator>(V))
+ setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+ else if (auto *II = dyn_cast<IntrinsicInst>(V))
+ setLimitsForIntrinsic(*II, Lower, Upper);
+ else if (auto *SI = dyn_cast<SelectInst>(V))
+ setLimitsForSelectPattern(*SI, Lower, Upper);
+
+ ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
+ CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
+
+ return CR;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 5656a19d7e0d..986756eb2627 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -1,9 +1,8 @@
//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,8 +37,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
cl::init(8));
/// Return true if all of the intrinsic's arguments and return type are scalars
-/// for the scalar form of the intrinsic and vectors for the vector form of the
-/// intrinsic.
+/// for the scalar form of the intrinsic, and vectors for the vector form of the
+/// intrinsic (except operands that are marked as always being scalar by
+/// hasVectorInstrinsicScalarOpd).
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::bswap: // Begin integer bit-manipulation.
@@ -49,6 +49,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
@@ -74,18 +81,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fmuladd:
case Intrinsic::powi:
case Intrinsic::canonicalize:
- case Intrinsic::sadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::usub_sat:
return true;
default:
return false;
}
}
-/// Identifies if the intrinsic has a scalar operand. It check for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
switch (ID) {
@@ -93,6 +95,10 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
case Intrinsic::cttz:
case Intrinsic::powi:
return (ScalarOpdIdx == 1);
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
+ return (ScalarOpdIdx == 2);
default:
return false;
}
@@ -300,30 +306,60 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
/// Get splat value if the input is a splat vector or return nullptr.
/// This function is not fully general. It checks only 2 cases:
-/// the input value is (1) a splat constants vector or (2) a sequence
-/// of instructions that broadcast a single value into a vector.
-///
+/// the input value is (1) a splat constant vector or (2) a sequence
+/// of instructions that broadcasts a scalar at element 0.
const llvm::Value *llvm::getSplatValue(const Value *V) {
-
- if (auto *C = dyn_cast<Constant>(V))
- if (isa<VectorType>(V->getType()))
+ if (isa<VectorType>(V->getType()))
+ if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue();
- auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
- if (!ShuffleInst)
- return nullptr;
- // All-zero (or undef) shuffle mask elements.
- for (int MaskElt : ShuffleInst->getShuffleMask())
- if (MaskElt != 0 && MaskElt != -1)
- return nullptr;
- // The first shuffle source is 'insertelement' with index 0.
- auto *InsertEltInst =
- dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
- if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
- !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
- return nullptr;
+ // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
+ Value *Splat;
+ if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat),
+ m_ZeroInt()),
+ m_Value(), m_ZeroInt())))
+ return Splat;
- return InsertEltInst->getOperand(1);
+ return nullptr;
+}
+
+// This setting is based on its counterpart in value tracking, but it could be
+// adjusted if needed.
+const unsigned MaxDepth = 6;
+
+bool llvm::isSplatValue(const Value *V, unsigned Depth) {
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+
+ if (isa<VectorType>(V->getType())) {
+ if (isa<UndefValue>(V))
+ return true;
+ // FIXME: Constant splat analysis does not allow undef elements.
+ if (auto *C = dyn_cast<Constant>(V))
+ return C->getSplatValue() != nullptr;
+ }
+
+ // FIXME: Constant splat analysis does not allow undef elements.
+ Constant *Mask;
+ if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
+ return Mask->getSplatValue() != nullptr;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ // If both operands of a binop are splats, the result is a splat.
+ Value *X, *Y, *Z;
+ if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
+ return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
+
+ // If all operands of a select are splats, the result is a splat.
+ if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
+ return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
+ isSplatValue(Z, Depth);
+
+ // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
+
+ return false;
}
MapVector<Instruction *, uint64_t>
@@ -711,6 +747,52 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
return ResList[0];
}
+bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
+ auto *ConstMask = dyn_cast<Constant>(Mask);
+ if (!ConstMask)
+ return false;
+ if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
+ return true;
+ for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ if (auto *MaskElt = ConstMask->getAggregateElement(I))
+ if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+
+bool llvm::maskIsAllOneOrUndef(Value *Mask) {
+ auto *ConstMask = dyn_cast<Constant>(Mask);
+ if (!ConstMask)
+ return false;
+ if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
+ return true;
+ for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ if (auto *MaskElt = ConstMask->getAggregateElement(I))
+ if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+/// TODO: This is a lot like known bits, but for
+/// vectors. Is there something we can common this with?
+APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
+
+ const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+ if (auto *CV = dyn_cast<ConstantVector>(Mask))
+ for (unsigned i = 0; i < VWidth; i++)
+ if (CV->getAggregateElement(i)->isNullValue())
+ DemandedElts.clearBit(i);
+ return DemandedElts;
+}
+
bool InterleavedAccessInfo::isStrided(int Stride) {
unsigned Factor = std::abs(Stride);
return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
@@ -992,7 +1074,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
// that all the pointers in the group don't wrap.
// So we check only group member 0 (which is always guaranteed to exist),
// and group member Factor - 1; If the latter doesn't exist we rely on
- // peeling (if it is a non-reveresed accsess -- see Case 3).
+ // peeling (if it is a non-reversed accsess -- see Case 3).
Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true)) {