summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-10 13:44:06 +0000
commit7ab83427af0f77b59941ceba41d509d7d097b065 (patch)
treecc41c05b1db454e3d802f34df75e636ee922ad87 /lib/Transforms/Scalar
parentd288ef4c1788d3a951a7558c68312c2d320612b1 (diff)
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp4
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--lib/Transforms/Scalar/DCE.cpp2
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp2
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp6
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp7
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp35
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp9
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp10
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp77
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp167
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp16
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp4
-rw-r--r--lib/Transforms/Scalar/LowerGuardIntrinsic.cpp2
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp8
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp80
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp18
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp4
-rw-r--r--lib/Transforms/Scalar/SROA.cpp19
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp6
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp2
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp10
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp4
-rw-r--r--lib/Transforms/Scalar/Sink.cpp2
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp4
32 files changed, 306 insertions, 214 deletions
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index fd931c521c8f..99480f12da9e 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -19,12 +19,11 @@
#define AA_NAME "alignment-from-assumptions"
#define DEBUG_TYPE AA_NAME
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +34,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
STATISTIC(NumLoadAlignChanged,
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 9e982194bac7..4fa27891a974 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,15 +18,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <set>
using namespace llvm;
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 07a0ba9b1222..fa4806e884c3 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -19,10 +19,10 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 185cdbdda378..063df779a30b 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index b7514a6d5793..29de792bd248 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -41,7 +41,6 @@
// ret void
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -50,6 +49,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 5c75f39e381d..8634816e702f 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -169,8 +169,8 @@ struct SinkingInstructionCandidate {
NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
- SplitEdgeCost;
}
- bool operator>=(const SinkingInstructionCandidate &Other) const {
- return Cost >= Other.Cost;
+ bool operator>(const SinkingInstructionCandidate &Other) const {
+ return Cost > Other.Cost;
}
};
@@ -745,7 +745,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
std::stable_sort(
Candidates.begin(), Candidates.end(),
[](const SinkingInstructionCandidate &A,
- const SinkingInstructionCandidate &B) { return A >= B; });
+ const SinkingInstructionCandidate &B) { return A > B; });
DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
: Candidates) dbgs()
<< " " << C << "\n";);
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 65a2cd955672..fb7c6e15758d 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -40,7 +40,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GuardWidening.h"
-#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -50,6 +49,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9a7882211bac..10782963177c 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -86,6 +86,10 @@ static cl::opt<bool> UsePostIncrementRanges(
cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
cl::init(true));
+static cl::opt<bool>
+DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
+ cl::desc("Disable Linear Function Test Replace optimization"));
+
namespace {
struct RewritePhi;
@@ -2413,7 +2417,8 @@ bool IndVarSimplify::run(Loop *L) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
+ if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
+ needsLFTR(L, DT)) {
PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index e21b0feb7c5a..2f96c3064b86 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -59,8 +59,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -1371,28 +1371,35 @@ bool LoopConstrainer::run() {
DT.recalculate(F);
+ // We need to first add all the pre and post loop blocks into the loop
+ // structures (as part of createClonedLoopStructure), and then update the
+ // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
+ // LI when LoopSimplifyForm is generated.
+ Loop *PreL = nullptr, *PostL = nullptr;
if (!PreLoop.Blocks.empty()) {
- auto *L = createClonedLoopStructure(
+ PreL = createClonedLoopStructure(
&OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map);
- formLCSSARecursively(*L, DT, &LI, &SE);
- simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
- // Pre loops are slow paths, we do not need to perform any loop
- // optimizations on them.
- DisableAllLoopOptsOnLoop(*L);
}
if (!PostLoop.Blocks.empty()) {
- auto *L = createClonedLoopStructure(
+ PostL = createClonedLoopStructure(
&OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map);
+ }
+
+ // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
+ auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
formLCSSARecursively(*L, DT, &LI, &SE);
simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
- // Post loops are slow paths, we do not need to perform any loop
+ // Pre/post loops are slow paths, we do not need to perform any loop
// optimizations on them.
- DisableAllLoopOptsOnLoop(*L);
- }
-
- formLCSSARecursively(OriginalLoop, DT, &LI, &SE);
- simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true);
+ if (!IsOriginalLoop)
+ DisableAllLoopOptsOnLoop(*L);
+ };
+ if (PreL)
+ CanonicalizeLoop(PreL, false);
+ if (PostL)
+ CanonicalizeLoop(PostL, false);
+ CanonicalizeLoop(&OriginalLoop, true);
return true;
}
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5e116ef2fe75..3c8fbd35bf8c 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -89,7 +89,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
@@ -100,6 +99,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -500,6 +500,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
// Computes the operands of the new constant expression.
+ bool IsNew = false;
SmallVector<Constant *, 4> NewOperands;
for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
Constant *Operand = CE->getOperand(Index);
@@ -509,6 +510,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
// bitcast, and getelementptr) do not incur cycles in the data flow graph
// and (2) this function is called on constant expressions in postorder.
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
+ IsNew = true;
NewOperands.push_back(cast<Constant>(NewOperand));
} else {
// Otherwise, reuses the old operand.
@@ -516,6 +518,11 @@ static Value *cloneConstantExprWithNewAddressSpace(
}
}
+ // If !IsNew, we will replace the Value with itself. However, replaced values
+ // are assumed to wrapped in a addrspace cast later so drop it now.
+ if (!IsNew)
+ return nullptr;
+
if (CE->getOpcode() == Instruction::GetElementPtr) {
// Needs to specify the source type while constructing a getelementptr
// constant expression.
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 2ef8f8563bb9..c120036464d0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -12,16 +12,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/JumpThreading.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -36,6 +35,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -132,7 +132,7 @@ bool JumpThreading::runOnFunction(Function &F) {
bool HasProfileData = F.getEntryCount().hasValue();
if (HasProfileData) {
LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
+ BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
@@ -152,7 +152,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
bool HasProfileData = F.getEntryCount().hasValue();
if (HasProfileData) {
LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
+ BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 494cbc61bc9c..025ba1bfedc1 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -11,7 +11,6 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -28,6 +27,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c6a05ecbd0b1..b706152f30c8 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -116,6 +116,7 @@ private:
Memset,
MemsetPattern,
Memcpy,
+ UnorderedAtomicMemcpy,
DontUse // Dummy retval never to be used. Allows catching errors in retval
// handling.
};
@@ -353,8 +354,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
LoopIdiomRecognize::LegalStoreKind
LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+
// Don't touch volatile stores.
- if (!SI->isSimple())
+ if (SI->isVolatile())
+ return LegalStoreKind::None;
+ // We only want simple or unordered-atomic stores.
+ if (!SI->isUnordered())
return LegalStoreKind::None;
// Don't convert stores of non-integral pointer types to memsets (which stores
@@ -395,15 +400,18 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
+ // Note: memset and memset_pattern on unordered-atomic is yet not supported
+ bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
+
// If we're allowed to form a memset, and the stored value would be
// acceptable for memset, use it.
- if (HasMemset && SplatValue &&
+ if (!UnorderedAtomic && HasMemset && SplatValue &&
// Verify that the stored value is loop invariant. If not, we can't
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// It looks like we can use SplatValue.
return LegalStoreKind::Memset;
- } else if (HasMemsetPattern &&
+ } else if (!UnorderedAtomic && HasMemsetPattern &&
// Don't create memset_pattern16s with address spaces.
StorePtr->getType()->getPointerAddressSpace() == 0 &&
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
@@ -422,7 +430,12 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// The store must be feeding a non-volatile load.
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
- if (!LI || !LI->isSimple())
+
+ // Only allow non-volatile loads
+ if (!LI || LI->isVolatile())
+ return LegalStoreKind::None;
+ // Only allow simple or unordered-atomic loads
+ if (!LI->isUnordered())
return LegalStoreKind::None;
// See if the pointer expression is an AddRec like {base,+,1} on the current
@@ -438,7 +451,9 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
return LegalStoreKind::None;
// Success. This store can be converted into a memcpy.
- return LegalStoreKind::Memcpy;
+ UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
+ return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
+ : LegalStoreKind::Memcpy;
}
// This store can't be transformed into a memset/memcpy.
return LegalStoreKind::None;
@@ -469,6 +484,7 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
StoreRefsForMemsetPattern[Ptr].push_back(SI);
} break;
case LegalStoreKind::Memcpy:
+ case LegalStoreKind::UnorderedAtomicMemcpy:
StoreRefsForMemcpy.push_back(SI);
break;
default:
@@ -882,7 +898,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// for (i) A[i] = B[i];
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *BECount) {
- assert(SI->isSimple() && "Expected only non-volatile stores.");
+ assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
Value *StorePtr = SI->getPointerOperand();
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
@@ -892,7 +908,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// The store must be feeding a non-volatile load.
LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
- assert(LI->isSimple() && "Expected only non-volatile stores.");
+ assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load. If we have something else, it's a
@@ -966,16 +982,47 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *NumBytesS =
SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
- if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
- SCEV::FlagNUW);
- Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+ unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
+ CallInst *NewCall = nullptr;
+ // Check whether to generate an unordered atomic memcpy:
+ // If the load or store are atomic, then they must neccessarily be unordered
+ // by previous checks.
+ if (!SI->isAtomic() && !LI->isAtomic()) {
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(
+ NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW);
- CallInst *NewCall =
- Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
- std::min(SI->getAlignment(), LI->getAlignment()));
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
+ } else {
+ // We cannot allow unaligned ops for unordered load/store, so reject
+ // anything where the alignment isn't at least the element size.
+ if (Align < StoreSize)
+ return false;
+
+ // If the element.atomic memcpy is not lowered into explicit
+ // loads/stores later, then it will be lowered into an element-size
+ // specific lib call. If the lib call doesn't exist for our store size, then
+ // we shouldn't generate the memcpy.
+ if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
+ return false;
+
+ Value *NumElements =
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
+ NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr,
+ NumElements, StoreSize);
+ // Propagate alignment info onto the pointer args. Note that unordered
+ // atomic loads/stores are *required* by the spec to have an alignment
+ // but non-atomic loads/stores may not.
+ NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(),
+ SI->getAlignment()));
+ NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(),
+ LI->getAlignment()));
+ }
NewCall->setDebugLoc(SI->getDebugLoc());
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 32fd3da465fe..9b12ba180444 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -37,7 +37,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopPredication.h"
-#include "llvm/Pass.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -48,6 +47,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index fd15a9014def..fc0216e76a5b 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,10 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 28d94497a3ef..b027278b24f2 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim(
// The flag adds instruction count to solutions cost comparision.
static cl::opt<bool> InsnsCost(
- "lsr-insns-cost", cl::Hidden, cl::init(false),
+ "lsr-insns-cost", cl::Hidden, cl::init(true),
cl::desc("Add instruction count to a LSR cost model"));
// Flag to choose how to narrow complex lsr solution
@@ -950,39 +950,37 @@ namespace {
/// This class is used to measure and compare candidate formulae.
class Cost {
- /// TODO: Some of these could be merged. Also, a lexical ordering
- /// isn't always optimal.
- unsigned Insns;
- unsigned NumRegs;
- unsigned AddRecCost;
- unsigned NumIVMuls;
- unsigned NumBaseAdds;
- unsigned ImmCost;
- unsigned SetupCost;
- unsigned ScaleCost;
+ TargetTransformInfo::LSRCost C;
public:
- Cost()
- : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
- ImmCost(0), SetupCost(0), ScaleCost(0) {}
+ Cost() {
+ C.Insns = 0;
+ C.NumRegs = 0;
+ C.AddRecCost = 0;
+ C.NumIVMuls = 0;
+ C.NumBaseAdds = 0;
+ C.ImmCost = 0;
+ C.SetupCost = 0;
+ C.ScaleCost = 0;
+ }
- bool operator<(const Cost &Other) const;
+ bool isLess(Cost &Other, const TargetTransformInfo &TTI);
void Lose();
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
- return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
- | ImmCost | SetupCost | ScaleCost) != ~0u)
- || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
- & ImmCost & SetupCost & ScaleCost) == ~0u);
+ return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
+ | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
+ || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
+ & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
}
#endif
bool isLoser() {
assert(isValid() && "invalid cost");
- return NumRegs == ~0u;
+ return C.NumRegs == ~0u;
}
void RateFormula(const TargetTransformInfo &TTI,
@@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg,
}
// Otherwise, it will be an invariant with respect to Loop L.
- ++NumRegs;
+ ++C.NumRegs;
return;
}
- AddRecCost += 1; /// TODO: This should be a function of the stride.
+ C.AddRecCost += 1; /// TODO: This should be a function of the stride.
// Add the step value register, if it needs one.
// TODO: The non-affine case isn't precisely modeled here.
@@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg,
}
}
}
- ++NumRegs;
+ ++C.NumRegs;
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
@@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg,
!(isa<SCEVAddRecExpr>(Reg) &&
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
- ++SetupCost;
+ ++C.SetupCost;
- NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
}
@@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
- unsigned PrevAddRecCost = AddRecCost;
- unsigned PrevNumRegs = NumRegs;
- unsigned PrevNumBaseAdds = NumBaseAdds;
+ unsigned PrevAddRecCost = C.AddRecCost;
+ unsigned PrevNumRegs = C.NumRegs;
+ unsigned PrevNumBaseAdds = C.NumBaseAdds;
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
@@ -1251,45 +1249,51 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
return;
}
- // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
- // additional instruction (at least fill).
- unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
- if (NumRegs > TTIRegNum) {
- // Cost already exceeded TTIRegNum, then only newly added register can add
- // new instructions.
- if (PrevNumRegs > TTIRegNum)
- Insns += (NumRegs - PrevNumRegs);
- else
- Insns += (NumRegs - TTIRegNum);
- }
-
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds +=
+ C.NumBaseAdds +=
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
- NumBaseAdds += (F.UnfoldedOffset != 0);
+ C.NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
- ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
+ C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
int64_t O = Fixup.Offset;
int64_t Offset = (uint64_t)O + F.BaseOffset;
if (F.BaseGV)
- ImmCost += 64; // Handle symbolic values conservatively.
+ C.ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
- ImmCost += APInt(64, Offset, true).getMinSignedBits();
+ C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
// Check with target if this offset with this instruction is
// specifically not supported.
if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
- NumBaseAdds++;
+ C.NumBaseAdds++;
+ }
+
+ // If we don't count instruction cost exit here.
+ if (!InsnsCost) {
+ assert(isValid() && "invalid cost");
+ return;
+ }
+
+ // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
+ // additional instruction (at least fill).
+ unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+ if (C.NumRegs > TTIRegNum) {
+ // Cost already exceeded TTIRegNum, then only newly added register can add
+ // new instructions.
+ if (PrevNumRegs > TTIRegNum)
+ C.Insns += (C.NumRegs - PrevNumRegs);
+ else
+ C.Insns += (C.NumRegs - TTIRegNum);
}
// If ICmpZero formula ends with not 0, it could not be replaced by
@@ -1302,55 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// For {-10, +, 1}:
// i = i + 1;
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
- Insns++;
+ C.Insns++;
// Each new AddRec adds 1 instruction to calculation.
- Insns += (AddRecCost - PrevAddRecCost);
+ C.Insns += (C.AddRecCost - PrevAddRecCost);
// BaseAdds adds instructions for unfolded registers.
if (LU.Kind != LSRUse::ICmpZero)
- Insns += NumBaseAdds - PrevNumBaseAdds;
+ C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
assert(isValid() && "invalid cost");
}
/// Set this cost to a losing value.
void Cost::Lose() {
- Insns = ~0u;
- NumRegs = ~0u;
- AddRecCost = ~0u;
- NumIVMuls = ~0u;
- NumBaseAdds = ~0u;
- ImmCost = ~0u;
- SetupCost = ~0u;
- ScaleCost = ~0u;
+ C.Insns = ~0u;
+ C.NumRegs = ~0u;
+ C.AddRecCost = ~0u;
+ C.NumIVMuls = ~0u;
+ C.NumBaseAdds = ~0u;
+ C.ImmCost = ~0u;
+ C.SetupCost = ~0u;
+ C.ScaleCost = ~0u;
}
/// Choose the lower cost.
-bool Cost::operator<(const Cost &Other) const {
- if (InsnsCost && Insns != Other.Insns)
- return Insns < Other.Insns;
- return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
- ImmCost, SetupCost) <
- std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
- Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
- Other.SetupCost);
+bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
+ if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
+ C.Insns != Other.C.Insns)
+ return C.Insns < Other.C.Insns;
+ return TTI.isLSRCostLess(C, Other.C);
}
void Cost::print(raw_ostream &OS) const {
- OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
- OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
- if (AddRecCost != 0)
- OS << ", with addrec cost " << AddRecCost;
- if (NumIVMuls != 0)
- OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
- if (NumBaseAdds != 0)
- OS << ", plus " << NumBaseAdds << " base add"
- << (NumBaseAdds == 1 ? "" : "s");
- if (ScaleCost != 0)
- OS << ", plus " << ScaleCost << " scale cost";
- if (ImmCost != 0)
- OS << ", plus " << ImmCost << " imm cost";
- if (SetupCost != 0)
- OS << ", plus " << SetupCost << " setup cost";
+ if (InsnsCost)
+ OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
+ OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
+ if (C.AddRecCost != 0)
+ OS << ", with addrec cost " << C.AddRecCost;
+ if (C.NumIVMuls != 0)
+ OS << ", plus " << C.NumIVMuls << " IV mul"
+ << (C.NumIVMuls == 1 ? "" : "s");
+ if (C.NumBaseAdds != 0)
+ OS << ", plus " << C.NumBaseAdds << " base add"
+ << (C.NumBaseAdds == 1 ? "" : "s");
+ if (C.ScaleCost != 0)
+ OS << ", plus " << C.ScaleCost << " scale cost";
+ if (C.ImmCost != 0)
+ OS << ", plus " << C.ImmCost << " imm cost";
+ if (C.SetupCost != 0)
+ OS << ", plus " << C.SetupCost << " setup cost";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -4105,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Cost CostBest;
Regs.clear();
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
- if (CostF < CostBest)
+ if (CostF.isLess(CostBest, TTI))
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -4573,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
- if (NewCost < SolutionCost) {
+ if (NewCost.isLess(SolutionCost, TTI)) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 19daebd0613a..d0c96fa627a4 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,34 +26,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 7d8da9b453f9..46f8a3564265 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -93,7 +93,9 @@ static bool handleSwitchExpect(SwitchInst &SI) {
/// the branch probability info for the originating branch can be inferred.
static void handlePhiDef(CallInst *Expect) {
Value &Arg = *Expect->getArgOperand(0);
- ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1));
+ ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(Expect->getArgOperand(1));
+ if (!ExpectedValue)
+ return;
const APInt &ExpectedPhiValue = ExpectedValue->getValue();
// Walk up in backward a list of instructions that
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 4f413715ffe6..070114a84cc5 100644
--- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -17,10 +17,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 21a632073da7..7896396f0898 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,11 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -31,12 +32,12 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 27809f5b6f66..6926aae37963 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -378,6 +378,15 @@ private:
};
namespace llvm {
+struct ExactEqualsExpression {
+ const Expression &E;
+ explicit ExactEqualsExpression(const Expression &E) : E(E) {}
+ hash_code getComputedHash() const { return E.getComputedHash(); }
+ bool operator==(const Expression &Other) const {
+ return E.exactlyEquals(Other);
+ }
+};
+
template <> struct DenseMapInfo<const Expression *> {
static const Expression *getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
@@ -390,8 +399,17 @@ template <> struct DenseMapInfo<const Expression *> {
return reinterpret_cast<const Expression *>(Val);
}
static unsigned getHashValue(const Expression *E) {
- return static_cast<unsigned>(E->getComputedHash());
+ return E->getComputedHash();
+ }
+ static unsigned getHashValue(const ExactEqualsExpression &E) {
+ return E.getComputedHash();
+ }
+ static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) {
+ if (RHS == getTombstoneKey() || RHS == getEmptyKey())
+ return false;
+ return LHS == *RHS;
}
+
static bool isEqual(const Expression *LHS, const Expression *RHS) {
if (LHS == RHS)
return true;
@@ -848,6 +866,8 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
// Things in TOPClass are equivalent to everything.
if (ValueToClass.lookup(*U) == TOPClass)
return false;
+ if (lookupOperandLeader(*U) == PN)
+ return false;
return true;
});
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
@@ -1571,30 +1591,6 @@ bool NewGVN::isCycleFree(const Instruction *I) const {
// Evaluate PHI nodes symbolically, and create an expression result.
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
- // Resolve irreducible and reducible phi cycles.
- // FIXME: This is hopefully a temporary solution while we resolve the issues
- // with fixpointing self-cycles. It currently should be "guaranteed" to be
- // correct, but non-optimal. The SCCFinder does not, for example, take
- // reachability of arguments into account, etc.
- SCCFinder.Start(I);
- bool CanOptimize = true;
- SmallPtrSet<Value *, 8> OuterOps;
-
- auto &Component = SCCFinder.getComponentFor(I);
- for (auto *Member : Component) {
- if (!isa<PHINode>(Member)) {
- CanOptimize = false;
- break;
- }
- for (auto &PHIOp : cast<PHINode>(Member)->operands())
- if (!isa<PHINode>(PHIOp) || !Component.count(cast<PHINode>(PHIOp)))
- OuterOps.insert(PHIOp);
- }
- if (CanOptimize && OuterOps.size() == 1) {
- DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin())
- << "\n");
- return createVariableOrConstant(*OuterOps.begin());
- }
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
@@ -1662,7 +1658,12 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
if (!someEquivalentDominates(AllSameInst, I))
return E;
}
-
+ // Can't simplify to something that comes later in the iteration.
+ // Otherwise, when and if it changes congruence class, we will never catch
+ // up. We will always be a class behind it.
+ if (isa<Instruction>(AllSameValue) &&
+ InstrToDFSNum(AllSameValue) > InstrToDFSNum(I))
+ return E;
NumGVNPhisAllSame++;
DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
<< "\n");
@@ -2158,7 +2159,17 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
if (OldClass->getDefiningExpr()) {
DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr()
<< " from table\n");
- ExpressionToClass.erase(OldClass->getDefiningExpr());
+ // We erase it as an exact expression to make sure we don't just erase an
+ // equivalent one.
+ auto Iter = ExpressionToClass.find_as(
+ ExactEqualsExpression(*OldClass->getDefiningExpr()));
+ if (Iter != ExpressionToClass.end())
+ ExpressionToClass.erase(Iter);
+#ifdef EXPENSIVE_CHECKS
+ assert(
+ (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) &&
+ "We erased the expression we just inserted, which should not happen");
+#endif
}
} else if (OldClass->getLeader() == I) {
// When the leader changes, the value numbering of
@@ -2272,8 +2283,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
auto *OldE = ValueToExpression.lookup(I);
// It could just be that the old class died. We don't want to erase it if we
// just moved classes.
- if (OldE && isa<StoreExpression>(OldE) && *E != *OldE)
- ExpressionToClass.erase(OldE);
+ if (OldE && isa<StoreExpression>(OldE) && *E != *OldE) {
+ // Erase this as an exact expression to ensure we don't erase expressions
+ // equivalent to it.
+ auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE));
+ if (Iter != ExpressionToClass.end())
+ ExpressionToClass.erase(Iter);
+ }
}
ValueToExpression[I] = E;
}
@@ -3060,6 +3076,9 @@ void NewGVN::iterateTouchedInstructions() {
}
updateProcessedCount(CurrBlock);
}
+ // Reset after processing (because we may mark ourselves as touched when
+ // we propagate equalities).
+ TouchedInstructions.reset(InstrNum);
if (auto *MP = dyn_cast<MemoryPhi>(V)) {
DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
@@ -3070,9 +3089,6 @@ void NewGVN::iterateTouchedInstructions() {
llvm_unreachable("Should have been a MemoryPhi or Instruction");
}
updateProcessedCount(V);
- // Reset after processing (because we may mark ourselves as touched when
- // we propagate equalities).
- TouchedInstructions.reset(InstrNum);
}
}
NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 615029dd161b..96295683314c 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -25,6 +24,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 350b50ffcdd4..bae7911d222c 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -12,15 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Pass.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@@ -28,15 +27,16 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 1d0e8396f6a2..815492ac354c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1117,7 +1117,7 @@ CallOverdefined:
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
- canConstantFoldCallTo(F)) {
+ canConstantFoldCallTo(CS, F)) {
SmallVector<Constant*, 8> Operands;
for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
@@ -1137,7 +1137,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+ if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) {
// call -> undef.
if (isa<UndefValue>(C))
return;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index fb1b5813fd79..1527f15f18a3 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3626,10 +3626,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
auto *PartPtrTy =
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
+ auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ PLoad,
+ getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(AS), PartOffset),
+ PartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
@@ -3707,9 +3709,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad = (*SplitLoads)[Idx];
} else {
IRB.SetInsertPoint(LI);
+ auto AS = LI->getPointerAddressSpace();
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
+ APInt(DL.getPointerSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
@@ -3717,10 +3720,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// And store this partition.
IRB.SetInsertPoint(SI);
+ auto AS = SI->getPointerAddressSpace();
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
- APInt(DL.getPointerSizeInBits(), PartOffset),
- StorePartPtrTy, StoreBasePtr->getName() + "."),
+ PLoad,
+ getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(AS), PartOffset),
+ StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 9fa43da99da9..850a01114eeb 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -20,12 +20,12 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index c0c09a7e43fe..d11855f2f3a9 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,12 +14,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index cde659b9d189..84675f41cdd5 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,27 +156,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace llvm::PatternMatch;
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0f170e26ce5f..aaab5857e0f1 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include <algorithm>
#include <cassert>
#include <iterator>
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 102e9eaeab77..5210f165b874 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!isSafeToSpeculativelyExecute(Inst))
+ if (isa<LoadInst>(Inst))
return false;
// We don't want to sink across a critical edge if we don't dominate the
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 49ce0262c97b..486f3e5a43d4 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
@@ -20,6 +19,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index bf54a51c7635..3e5993618c4c 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -51,13 +51,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -76,6 +75,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;