summaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp13
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp27
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp126
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp46
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp99
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h27
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp43
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp63
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp14
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp7
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp230
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/GVN.cpp188
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp100
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp295
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp53
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp2
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp50
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp110
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp15
-rw-r--r--lib/Transforms/Scalar/SROA.cpp15
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp52
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp1
-rw-r--r--lib/Transforms/Utils/Local.cpp54
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp83
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp46
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp68
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp47
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp6
35 files changed, 1167 insertions, 740 deletions
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 4bc64ab698ff..087a8aa2c624 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -72,10 +72,6 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
-static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
- cl::Hidden,
- cl::desc("Run the load combining pass"));
-
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
@@ -174,7 +170,6 @@ PassManagerBuilder::PassManagerBuilder() {
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
- LoadCombine = RunLoadCombine;
NewGVN = RunNewGVN;
DisableGVNLoadPRE = false;
VerifyInput = false;
@@ -296,6 +291,8 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
InstrProfOptions Options;
if (!PGOInstrGen.empty())
Options.InstrProfileOutput = PGOInstrGen;
+ Options.DoCounterPromotion = true;
+ MPM.add(createLoopRotatePass());
MPM.add(createInstrProfilingLegacyPass(Options));
}
if (!PGOInstrUse.empty())
@@ -407,9 +404,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
}
- if (LoadCombine)
- MPM.add(createLoadCombinePass());
-
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
// Clean up after everything.
@@ -850,9 +844,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// alignments.
PM.add(createAlignmentFromAssumptionsPass());
- if (LoadCombine)
- PM.add(createLoadCombinePass());
-
// Cleanup and simplify the code after the scalar optimizations.
addInstructionCombiningPass(PM);
addExtensionsToPM(EP_Peephole, PM);
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 67bc8f5f6b7a..656421ee58df 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -690,6 +690,9 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
Function *CalledFunction = CallSite(I).getCalledFunction();
+ // Do not inline recursive calls.
+ if (CalledFunction == &F)
+ continue;
Instruction *DI = I;
if (!CalledFunction && !PromotedInsns.count(I) &&
CallSite(I).isIndirectCall())
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 287a5167fe2a..d5f0dd191415 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -988,15 +988,24 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add,
return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
}
- // Shifts and add used to flip and mask off the low bit:
- // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
- const APInt *C3;
- if (C->isOneValue() &&
- match(Op0,
- m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) &&
- C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
- Value *NotX = Builder.CreateNot(X);
- return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
+ if (C->isOneValue() && Op0->hasOneUse()) {
+ // add (sext i1 X), 1 --> zext (not X)
+ // TODO: The smallest IR representation is (select X, 0, 1), and that would
+ // not require the one-use check. But we need to remove a transform in
+ // visitSelect and make sure that IR value tracking for select is equal or
+ // better than for these ops.
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarSizeInBits() == 1)
+ return new ZExtInst(Builder.CreateNot(X), Ty);
+
+ // Shifts and add used to flip and mask off the low bit:
+ // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
+ const APInt *C3;
+ if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) &&
+ C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
+ Value *NotX = Builder.CreateNot(X);
+ return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
+ }
}
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a881bda5ba98..d3d8cefe9735 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1097,20 +1097,11 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
Type *DestTy = Logic.getType();
Type *SrcTy = Cast->getSrcTy();
- // If the first operand is bitcast, move the logic operation ahead of the
- // bitcast (do the logic operation in the original type). This can eliminate
- // bitcasts and allow combines that would otherwise be impeded by the bitcast.
+ // Move the logic operation ahead of a zext if the constant is unchanged in
+ // the smaller source type. Performing the logic in a smaller type may provide
+ // more information to later folds, and the smaller logic instruction may be
+ // cheaper (particularly in the case of vectors).
Value *X;
- if (match(Cast, m_BitCast(m_Value(X)))) {
- Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
- Value *NewOp = Builder->CreateBinOp(LogicOpc, X, NewConstant);
- return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
- }
-
- // Similarly, move the logic operation ahead of a zext if the constant is
- // unchanged in the smaller source type. Performing the logic in a smaller
- // type may provide more information to later folds, and the smaller logic
- // instruction may be cheaper (particularly in the case of vectors).
if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
@@ -1239,9 +1230,10 @@ static Instruction *foldAndToXor(BinaryOperator &I,
// (A | ~B) & (B | ~A) --> ~(A ^ B)
// (~B | A) & (~A | B) --> ~(A ^ B)
// (~B | A) & (B | ~A) --> ~(A ^ B)
- if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+ if (Op0->hasOneUse() || Op1->hasOneUse())
+ if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
return nullptr;
}
@@ -1256,9 +1248,10 @@ static Instruction *foldOrToXor(BinaryOperator &I,
// Operand complexity canonicalization guarantees that the 'and' is Op0.
// (A & B) | ~(A | B) --> ~(A ^ B)
// (A & B) | ~(B | A) --> ~(A ^ B)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
- return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+ if (Op0->hasOneUse() || Op1->hasOneUse())
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
// (A & ~B) | (~A & B) --> A ^ B
// (A & ~B) | (B & ~A) --> A ^ B
@@ -1442,13 +1435,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
+ if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
+ if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
@@ -1579,11 +1572,14 @@ static Value *getSelectCondition(Value *A, Value *B,
// If A and B are sign-extended, look through the sexts to find the booleans.
Value *Cond;
+ Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
Cond->getType()->getScalarType()->isIntegerTy(1) &&
- match(B, m_CombineOr(m_Not(m_SExt(m_Specific(Cond))),
- m_SExt(m_Not(m_Specific(Cond))))))
- return Cond;
+ match(B, m_OneUse(m_Not(m_Value(NotB))))) {
+ NotB = peekThroughBitcast(NotB, true);
+ if (match(NotB, m_SExt(m_Specific(Cond))))
+ return Cond;
+ }
// All scalar (and most vector) possibilities should be handled now.
// Try more matches that only apply to non-splat constant vectors.
@@ -1615,12 +1611,8 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
// The potential condition of the select may be bitcasted. In that case, look
// through its bitcast and the corresponding bitcast of the 'not' condition.
Type *OrigType = A->getType();
- Value *SrcA, *SrcB;
- if (match(A, m_OneUse(m_BitCast(m_Value(SrcA)))) &&
- match(B, m_OneUse(m_BitCast(m_Value(SrcB))))) {
- A = SrcA;
- B = SrcB;
- }
+ A = peekThroughBitcast(A, true);
+ B = peekThroughBitcast(B, true);
if (Value *Cond = getSelectCondition(A, B, Builder)) {
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
@@ -1922,8 +1914,9 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
/// (A & C1) | B
///
/// when the XOR of the two constants is "all ones" (-1).
-Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C) {
+static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C,
+ InstCombiner::BuilderTy *Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1) return nullptr;
@@ -1944,15 +1937,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
/// \brief This helper function folds:
///
-/// ((A | B) & C1) ^ (B & C2)
+/// ((A ^ B) & C1) | (B & C2)
///
/// into:
///
/// (A & C1) ^ B
///
/// when the XOR of the two constants is "all ones" (-1).
-Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C) {
+static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
+ Value *A, Value *B, Value *C,
+ InstCombiner::BuilderTy *Builder) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
if (!CI1)
return nullptr;
@@ -2112,46 +2106,36 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// ((A|B)&1)|(B&-2) -> (A&1) | B
- if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
- match(A, m_Or(m_Specific(B), m_Value(V1)))) {
- Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
- if (Ret) return Ret;
+ if (match(A, m_c_Or(m_Value(V1), m_Specific(B)))) {
+ if (Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C, Builder))
+ return Ret;
}
// (B&-2)|((A|B)&1) -> (A&1) | B
- if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
- match(B, m_Or(m_Value(V1), m_Specific(A)))) {
- Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
- if (Ret) return Ret;
+ if (match(B, m_c_Or(m_Specific(A), m_Value(V1)))) {
+ if (Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D, Builder))
+ return Ret;
}
// ((A^B)&1)|(B&-2) -> (A&1) ^ B
- if (match(A, m_Xor(m_Value(V1), m_Specific(B))) ||
- match(A, m_Xor(m_Specific(B), m_Value(V1)))) {
- Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C);
- if (Ret) return Ret;
+ if (match(A, m_c_Xor(m_Value(V1), m_Specific(B)))) {
+ if (Instruction *Ret = FoldXorWithConstants(I, Op1, V1, B, C, Builder))
+ return Ret;
}
// (B&-2)|((A^B)&1) -> (A&1) ^ B
- if (match(B, m_Xor(m_Specific(A), m_Value(V1))) ||
- match(B, m_Xor(m_Value(V1), m_Specific(A)))) {
- Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D);
- if (Ret) return Ret;
+ if (match(B, m_c_Xor(m_Specific(A), m_Value(V1)))) {
+ if (Instruction *Ret = FoldXorWithConstants(I, Op0, A, V1, D, Builder))
+ return Ret;
}
}
// (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
- // FIXME: The two hasOneUse calls here are the same call, maybe we were
- // supposed to check Op1->operand(0)?
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
- if (Op1->hasOneUse() || cast<BinaryOperator>(Op1)->hasOneUse())
- return BinaryOperator::CreateOr(Op0, C);
+ return BinaryOperator::CreateOr(Op0, C);
// ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
- // FIXME: The two hasOneUse calls here are the same call, maybe we were
- // supposed to check Op0->operand(0)?
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
- if (Op0->hasOneUse() || cast<BinaryOperator>(Op0)->hasOneUse())
- return BinaryOperator::CreateOr(Op1, C);
+ return BinaryOperator::CreateOr(Op1, C);
// ((B | C) & A) | B -> B | (A & C)
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
@@ -2357,6 +2341,30 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ // Instead of trying to imitate the folds for and/or, decompose this 'xor'
+ // into those logic ops. That is, try to turn this into an and-of-icmps
+ // because we have many folds for that pattern.
+ //
+ // This is based on a truth table definition of xor:
+ // X ^ Y --> (X | Y) & !(X & Y)
+ if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {
+ // TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
+ // TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
+ if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
+ // TODO: Independently handle cases where the 'and' side is a constant.
+ if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
+ // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
+ RHS->setPredicate(RHS->getInversePredicate());
+ return Builder->CreateAnd(LHS, RHS);
+ }
+ if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
+ // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
+ LHS->setPredicate(LHS->getInversePredicate());
+ return Builder->CreateAnd(LHS, RHS);
+ }
+ }
+ }
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c0830a5d2112..dbed7ad4eae8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1409,6 +1409,47 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
}
}
+ // Add range metadata since known bits can't completely reflect what we know.
+ // TODO: Handle splat vectors.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
+ assert(II.getIntrinsicID() == Intrinsic::ctpop &&
+ "Expected ctpop intrinsic");
+ Value *Op0 = II.getArgOperand(0);
+ // FIXME: Try to simplify vectors of integers.
+ auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ if (!IT)
+ return nullptr;
+
+ unsigned BitWidth = IT->getBitWidth();
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
+
+ unsigned MinCount = Known.countMinPopulation();
+ unsigned MaxCount = Known.countMaxPopulation();
+
+ // Add range metadata since known bits can't completely reflect what we know.
+ if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
+ ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
+ II.setMetadata(LLVMContext::MD_range,
+ MDNode::get(II.getContext(), LowAndHigh));
+ return &II;
+ }
+
return nullptr;
}
@@ -1981,6 +2022,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
break;
+ case Intrinsic::ctpop:
+ if (auto *I = foldCtpop(*II, *this))
+ return I;
+ break;
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::umul_with_overflow:
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 38e95fb11639..d3049389dfb9 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1896,6 +1896,18 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
}
+ // Canonicalize vector bitcasts to come before vector bitwise logic with a
+ // constant. This eases recognition of special constants for later ops.
+ // Example:
+ // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
+ Constant *C;
+ if (match(BO->getOperand(1), m_Constant(C))) {
+ // bitcast (logic X, C) --> logic (bitcast X, C')
+ Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
+ Value *CastedC = ConstantExpr::getBitCast(C, DestTy);
+ return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
+ }
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1ef4acfb058c..6ad32490a328 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2434,6 +2434,77 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
return nullptr;
}
+bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
+ Value *&RHS, ConstantInt *&Less,
+ ConstantInt *&Equal,
+ ConstantInt *&Greater) {
+ // TODO: Generalize this to work with other comparison idioms or ensure
+ // they get canonicalized into this form.
+
+ // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
+ // Greater), where Equal, Less and Greater are placeholders for any three
+ // constants.
+ ICmpInst::Predicate PredA, PredB;
+ if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
+ match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
+ PredA == ICmpInst::ICMP_EQ &&
+ match(SI->getFalseValue(),
+ m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
+ m_ConstantInt(Less), m_ConstantInt(Greater))) &&
+ PredB == ICmpInst::ICMP_SLT) {
+ return true;
+ }
+ return false;
+}
+
+Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
+ Instruction *Select,
+ ConstantInt *C) {
+
+ assert(C && "Cmp RHS should be a constant int!");
+ // If we're testing a constant value against the result of a three way
+ // comparison, the result can be expressed directly in terms of the
+ // original values being compared. Note: We could possibly be more
+ // aggressive here and remove the hasOneUse test. The original select is
+ // really likely to simplify or sink when we remove a test of the result.
+ Value *OrigLHS, *OrigRHS;
+ ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
+ if (Cmp.hasOneUse() &&
+ matchThreeWayIntCompare(cast<SelectInst>(Select), OrigLHS, OrigRHS,
+ C1LessThan, C2Equal, C3GreaterThan)) {
+ assert(C1LessThan && C2Equal && C3GreaterThan);
+
+ bool TrueWhenLessThan =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
+ ->isAllOnesValue();
+ bool TrueWhenEqual =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
+ ->isAllOnesValue();
+ bool TrueWhenGreaterThan =
+ ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
+ ->isAllOnesValue();
+
+ // This generates the new instruction that will replace the original Cmp
+ // Instruction. Instead of enumerating the various combinations when
+ // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
+ // false, we rely on chaining of ORs and future passes of InstCombine to
+ // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
+
+ // When none of the three constants satisfy the predicate for the RHS (C),
+ // the entire original Cmp can be simplified to a false.
+ Value *Cond = Builder->getFalse();
+ if (TrueWhenLessThan)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+ if (TrueWhenEqual)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+ if (TrueWhenGreaterThan)
+ Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+
+ return replaceInstUsesWith(Cmp, Cond);
+ }
+ return nullptr;
+}
+
/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
/// where X is some kind of instruction.
Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
@@ -2493,11 +2564,28 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
return I;
}
+ // Match against CmpInst LHS being instructions other than binary operators.
Instruction *LHSI;
- if (match(Cmp.getOperand(0), m_Instruction(LHSI)) &&
- LHSI->getOpcode() == Instruction::Trunc)
- if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C))
- return I;
+ if (match(Cmp.getOperand(0), m_Instruction(LHSI))) {
+ switch (LHSI->getOpcode()) {
+ case Instruction::Select:
+ {
+ // For now, we only support constant integers while folding the
+ // ICMP(SELECT)) pattern. We can extend this to support vector of integers
+ // similar to the cases handled by binary ops above.
+ if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
+ if (Instruction *I = foldICmpSelectConstant(Cmp, LHSI, ConstRHS))
+ return I;
+ break;
+ }
+ case Instruction::Trunc:
+ if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C))
+ return I;
+ break;
+ default:
+ break;
+ }
+ }
if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C))
return I;
@@ -3110,8 +3198,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (BO0) {
// Transform A & (L - 1) `ult` L --> L != 0
auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
- auto BitwiseAnd =
- m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+ auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
auto *Zero = Constant::getNullValue(BO0->getType());
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 1a7db146df42..1b0fe84dd4dd 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -95,6 +95,18 @@ static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) {
}
}
+/// Return the source operand of a potentially bitcasted value while optionally
+/// checking if it has one use. If there is no bitcast or the one use check is
+/// not met, return the input value itself.
+static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
+ if (auto *BitCast = dyn_cast<BitCastInst>(V))
+ if (!OneUseOnly || BitCast->hasOneUse())
+ return BitCast->getOperand(0);
+
+ // V is not a bitcast or V has more than one use and OneUseOnly is true.
+ return V;
+}
+
/// \brief Add one to a Constant
static inline Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
@@ -276,10 +288,6 @@ public:
Instruction *visitFDiv(BinaryOperator &I);
Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted);
Instruction *visitAnd(BinaryOperator &I);
- Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
- Value *B, Value *C);
- Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A,
- Value *B, Value *C);
Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
@@ -595,6 +603,15 @@ private:
Instruction::BinaryOps, Value *, Value *, Value *,
Value *);
+ /// Match a select chain which produces one of three values based on whether
+ /// the LHS is less than, equal to, or greater than RHS respectively.
+ /// Return true if we matched a three way compare idiom. The LHS, RHS, Less,
+ /// Equal and Greater values are saved in the matching process and returned to
+ /// the caller.
+ bool matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, Value *&RHS,
+ ConstantInt *&Less, ConstantInt *&Equal,
+ ConstantInt *&Greater);
+
/// \brief Attempts to replace V with a simpler value based on the demanded
/// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
@@ -672,6 +689,8 @@ private:
Instruction *foldICmpBinOp(ICmpInst &Cmp);
Instruction *foldICmpEquality(ICmpInst &Cmp);
+ Instruction *foldICmpSelectConstant(ICmpInst &Cmp, Instruction *Select,
+ ConstantInt *C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, Instruction *Trunc,
const APInt *C);
Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index a4d84ae81aa0..ca370c73fca4 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -169,6 +169,18 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
+/// Returns true if V is dereferenceable for size of alloca.
+static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
+ const DataLayout &DL) {
+ if (AI->isArrayAllocation())
+ return false;
+ uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
+ if (!AllocaSize)
+ return false;
+ return isDereferenceableAndAlignedPointer(V, AI->getAlignment(),
+ APInt(64, AllocaSize), DL);
+}
+
static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
// Check for array size of 1 (scalar allocation).
if (!AI.isArrayAllocation()) {
@@ -390,7 +402,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
- if (AI.getAlignment() <= SourceAlign) {
+ if (AI.getAlignment() <= SourceAlign &&
+ isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
@@ -476,21 +489,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
break;
case LLVMContext::MD_nonnull:
- // This only directly applies if the new type is also a pointer.
- if (NewTy->isPointerTy()) {
- NewLoad->setMetadata(ID, N);
- break;
- }
- // If it's integral now, translate it to !range metadata.
- if (NewTy->isIntegerTy()) {
- auto *ITy = cast<IntegerType>(NewTy);
- auto *NullInt = ConstantExpr::getPtrToInt(
- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
- auto *NonNullInt =
- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
- NewLoad->setMetadata(LLVMContext::MD_range,
- MDB.createRange(NonNullInt, NullInt));
- }
+ copyNonnullMetadata(LI, N, *NewLoad);
break;
case LLVMContext::MD_align:
case LLVMContext::MD_dereferenceable:
@@ -500,17 +499,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
NewLoad->setMetadata(ID, N);
break;
case LLVMContext::MD_range:
- // FIXME: It would be nice to propagate this in some way, but the type
- // conversions make it hard.
-
- // If it's a pointer now and the range does not contain 0, make it !nonnull.
- if (NewTy->isPointerTy()) {
- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
- MDNode *NN = MDNode::get(LI.getContext(), None);
- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
- }
- }
+ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
break;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b9674d85634d..33951e66497a 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -303,7 +303,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
-/// (or (shl (and X, C1), C3), y)
+/// (or (shl (and X, C1), C3), Y)
/// iff:
/// C1 and C2 are both powers of 2
/// where:
@@ -317,19 +317,44 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
+ if (!IC || !SI.getType()->isIntegerTy())
return nullptr;
Value *CmpLHS = IC->getOperand(0);
Value *CmpRHS = IC->getOperand(1);
- if (!match(CmpRHS, m_Zero()))
- return nullptr;
+ Value *V;
+ unsigned C1Log;
+ bool IsEqualZero;
+ bool NeedAnd = false;
+ if (IC->isEquality()) {
+ if (!match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))
+ return nullptr;
+
+ V = CmpLHS;
+ C1Log = C1->logBase2();
+ IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_EQ;
+ } else if (IC->getPredicate() == ICmpInst::ICMP_SLT ||
+ IC->getPredicate() == ICmpInst::ICMP_SGT) {
+ // We also need to recognize (icmp slt (trunc (X)), 0) and
+ // (icmp sgt (trunc (X)), -1).
+ IsEqualZero = IC->getPredicate() == ICmpInst::ICMP_SGT;
+ if ((IsEqualZero && !match(CmpRHS, m_AllOnes())) ||
+ (!IsEqualZero && !match(CmpRHS, m_Zero())))
+ return nullptr;
+
+ if (!match(CmpLHS, m_OneUse(m_Trunc(m_Value(V)))))
+ return nullptr;
- Value *X;
- const APInt *C1;
- if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ C1Log = CmpLHS->getType()->getScalarSizeInBits() - 1;
+ NeedAnd = true;
+ } else {
return nullptr;
+ }
const APInt *C2;
bool OrOnTrueVal = false;
@@ -340,11 +365,27 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
if (!OrOnFalseVal && !OrOnTrueVal)
return nullptr;
- Value *V = CmpLHS;
Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
- unsigned C1Log = C1->logBase2();
unsigned C2Log = C2->logBase2();
+
+ bool NeedXor = (!IsEqualZero && OrOnFalseVal) || (IsEqualZero && OrOnTrueVal);
+ bool NeedShift = C1Log != C2Log;
+ bool NeedZExtTrunc = Y->getType()->getIntegerBitWidth() !=
+ V->getType()->getIntegerBitWidth();
+
+ // Make sure we don't create more instructions than we save.
+ Value *Or = OrOnFalseVal ? FalseVal : TrueVal;
+ if ((NeedShift + NeedXor + NeedZExtTrunc) >
+ (IC->hasOneUse() + Or->hasOneUse()))
+ return nullptr;
+
+ if (NeedAnd) {
+ // Insert the AND instruction on the input to the truncate.
+ APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);
+ V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1));
+ }
+
if (C2Log > C1Log) {
V = Builder->CreateZExtOrTrunc(V, Y->getType());
V = Builder->CreateShl(V, C2Log - C1Log);
@@ -354,9 +395,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
} else
V = Builder->CreateZExtOrTrunc(V, Y->getType());
- ICmpInst::Predicate Pred = IC->getPredicate();
- if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
- (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ if (NeedXor)
V = Builder->CreateXor(V, *C2);
return Builder->CreateOr(V, Y);
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 8cec865c6422..1bb1a85367d1 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -556,8 +556,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
// The inexact versions are deferred to DAGCombine, so we don't hide shl
// behind a bit mask.
const APInt *ShOp1;
- if (match(Op0, m_CombineOr(m_Exact(m_LShr(m_Value(X), m_APInt(ShOp1))),
- m_Exact(m_AShr(m_Value(X), m_APInt(ShOp1)))))) {
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1))))) {
unsigned ShrAmt = ShOp1->getZExtValue();
if (ShrAmt < ShAmt) {
// If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 65e6d2e35905..02fac4fb37a4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -939,9 +939,19 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// `TrueVInPred`.
if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
- else
+ else {
+ // Generate the select in the same block as PN's current incoming block.
+ // Note: ThisBB need not be the NonConstBB because vector constants
+ // which are constants by definition are handled here.
+ // FIXME: This can lead to an increase in IR generation because we might
+ // generate selects for vector constant phi operand, that could not be
+ // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
+ // non-vector phis, this transformation was always profitable because
+ // the select would be generated exactly once in the NonConstBB.
+ Builder->SetInsertPoint(ThisBB->getTerminator());
InV = Builder->CreateSelect(PN->getIncomingValue(i),
TrueVInPred, FalseVInPred, "phitmp");
+ }
NewPN->addIncoming(InV, ThisBB);
}
} else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
@@ -3002,6 +3012,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
++NumDeadInst;
DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
+ MadeIRChange = true;
continue;
}
@@ -3015,6 +3026,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
++NumConstProp;
if (isInstructionTriviallyDead(Inst, TLI))
Inst->eraseFromParent();
+ MadeIRChange = true;
continue;
}
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0d308810009d..4089d81ea3e1 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -642,7 +642,12 @@ static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {
if (DisableICP)
return false;
InstrProfSymtab Symtab;
- Symtab.create(M, InLTO);
+ if (Error E = Symtab.create(M, InLTO)) {
+ std::string SymtabFailure = toString(std::move(E));
+ DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
+ (void)SymtabFailure;
+ return false;
+ }
bool Changed = false;
for (auto &F : M) {
if (F.isDeclaration())
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 37f88d5f95f1..9c14b0149fdc 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -19,12 +19,14 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -40,7 +42,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -92,6 +97,35 @@ cl::opt<double> NumCountersPerValueSite(
// is usually smaller than 2.
cl::init(1.0));
+cl::opt<bool> AtomicCounterUpdatePromoted(
+ "atomic-counter-update-promoted", cl::ZeroOrMore,
+ cl::desc("Do counter update using atomic fetch add "
+ " for promoted counters only"),
+ cl::init(false));
+
+// If the option is not specified, the default behavior about whether
+// counter promotion is done depends on how instrumentaiton lowering
+// pipeline is setup, i.e., the default value of true of this option
+// does not mean the promotion will be done by default. Explicitly
+// setting this option can override the default behavior.
+cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
+ cl::desc("Do counter register promotion"),
+ cl::init(false));
+cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
+ cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10),
+ cl::desc("Max number counter promotions per loop to avoid"
+ " increasing register pressure too much"));
+
+// A debug option
+cl::opt<int>
+ MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
+ cl::desc("Max number of allowed counter promotions"));
+
+cl::opt<bool> SpeculativeCounterPromotion(
+ cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false),
+ cl::desc("Allow counter promotion for loops with multiple exiting blocks "
+ " or top-tested loops. "));
+
class InstrProfilingLegacyPass : public ModulePass {
InstrProfiling InstrProf;
@@ -116,6 +150,123 @@ public:
}
};
+/// A helper class to promote one counter RMW operation in the loop
+/// into register update.
+///
+/// RWM update for the counter will be sinked out of the loop after
+/// the transformation.
+///
+class PGOCounterPromoterHelper : public LoadAndStorePromoter {
+public:
+ PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA,
+ Value *Init, BasicBlock *PH,
+ ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<Instruction *> InsertPts)
+ : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
+ InsertPts(InsertPts) {
+ assert(isa<LoadInst>(L));
+ assert(isa<StoreInst>(S));
+ SSA.AddAvailableValue(PH, Init);
+ }
+ void doExtraRewritesBeforeFinalDeletion() const override {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Instruction *InsertPos = InsertPts[i];
+ // Get LiveIn value into the ExitBlock. If there are multiple
+ // predecessors, the value is defined by a PHI node in this
+ // block.
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+ IRBuilder<> Builder(InsertPos);
+ if (AtomicCounterUpdatePromoted)
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
+ AtomicOrdering::SequentiallyConsistent);
+ else {
+ LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
+ auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
+ Builder.CreateStore(NewVal, Addr);
+ }
+ }
+ }
+
+private:
+ Instruction *Store;
+ ArrayRef<BasicBlock *> ExitBlocks;
+ ArrayRef<Instruction *> InsertPts;
+};
+
+/// A helper class to do register promotion for all profile counter
+/// updates in a loop.
+///
+class PGOCounterPromoter {
+public:
+ PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop)
+ : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) {
+
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ SmallPtrSet<BasicBlock *, 8> BlockSet;
+ ParentLoop.getExitBlocks(LoopExitBlocks);
+
+ for (BasicBlock *ExitBlock : LoopExitBlocks) {
+ if (BlockSet.insert(ExitBlock).second) {
+ ExitBlocks.push_back(ExitBlock);
+ InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ }
+ }
+ }
+
+ bool run(int64_t *NumPromoted) {
+ // We can't insert into a catchswitch.
+ bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
+ return isa<CatchSwitchInst>(Exit->getTerminator());
+ });
+
+ if (HasCatchSwitch)
+ return false;
+
+ if (!ParentLoop.hasDedicatedExits())
+ return false;
+
+ BasicBlock *PH = ParentLoop.getLoopPreheader();
+ if (!PH)
+ return false;
+
+ BasicBlock *H = ParentLoop.getHeader();
+ bool TopTested =
+ ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H));
+ if (!SpeculativeCounterPromotion &&
+ (TopTested || ParentLoop.getExitingBlock() == nullptr))
+ return false;
+
+ unsigned Promoted = 0;
+ for (auto &Cand : Candidates) {
+
+ SmallVector<PHINode *, 4> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+ PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
+ PH, ExitBlocks, InsertPts);
+ Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+ Promoted++;
+ if (Promoted >= MaxNumOfPromotionsPerLoop)
+ break;
+ (*NumPromoted)++;
+ if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
+ break;
+ }
+
+ DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
+ << ParentLoop.getLoopDepth() << ")\n");
+ return Promoted != 0;
+ }
+
+private:
+ ArrayRef<LoadStorePair> Candidates;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+ Loop &ParentLoop;
+};
+
} // end anonymous namespace
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
@@ -147,6 +298,63 @@ static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
return dyn_cast<InstrProfIncrementInst>(Instr);
}
+bool InstrProfiling::lowerIntrinsics(Function *F) {
+ bool MadeChange = false;
+ PromotionCandidates.clear();
+ for (BasicBlock &BB : *F) {
+ for (auto I = BB.begin(), E = BB.end(); I != E;) {
+ auto Instr = I++;
+ InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+ if (Inc) {
+ lowerIncrement(Inc);
+ MadeChange = true;
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ lowerValueProfileInst(Ind);
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ promoteCounterLoadStores(F);
+ return true;
+}
+
+bool InstrProfiling::isCounterPromotionEnabled() const {
+ if (DoCounterPromotion.getNumOccurrences() > 0)
+ return DoCounterPromotion;
+
+ return Options.DoCounterPromotion;
+}
+
+void InstrProfiling::promoteCounterLoadStores(Function *F) {
+ if (!isCounterPromotionEnabled())
+ return;
+
+ DominatorTree DT(*F);
+ LoopInfo LI(DT);
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+
+ for (const auto &LoadStore : PromotionCandidates) {
+ auto *CounterLoad = LoadStore.first;
+ auto *CounterStore = LoadStore.second;
+ BasicBlock *BB = CounterLoad->getParent();
+ Loop *ParentLoop = LI.getLoopFor(BB);
+ if (!ParentLoop)
+ continue;
+ LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
+ }
+
+ SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
+
+ for (auto *Loop : Loops) {
+ PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop);
+ Promoter.run(&TotalCountersPromoted);
+ }
+}
+
bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
bool MadeChange = false;
@@ -179,18 +387,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
}
for (Function &F : M)
- for (BasicBlock &BB : F)
- for (auto I = BB.begin(), E = BB.end(); I != E;) {
- auto Instr = I++;
- InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
- if (Inc) {
- lowerIncrement(Inc);
- MadeChange = true;
- } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
- lowerValueProfileInst(Ind);
- MadeChange = true;
- }
- }
+ MadeChange |= lowerIntrinsics(&F);
if (GlobalVariable *CoverageNamesVar =
M.getNamedGlobal(getCoverageUnusedNamesVarName())) {
@@ -303,9 +500,12 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
- Value *Count = Builder.CreateLoad(Addr, "pgocount");
- Count = Builder.CreateAdd(Count, Inc->getStep());
- Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
+ Value *Load = Builder.CreateLoad(Addr, "pgocount");
+ auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+ auto *Store = Builder.CreateStore(Count, Addr);
+ Inc->replaceAllUsesWith(Store);
+ if (isCounterPromotionEnabled())
+ PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
Inc->eraseFromParent();
}
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index b2d95271479c..0e7d11c55397 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1177,7 +1177,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty),
+ Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
++CurCtrId;
}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5196cc46181..457c9427ab9a 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_library(LLVMScalarOpts
LICM.cpp
LoopAccessAnalysisPrinter.cpp
LoopSink.cpp
- LoadCombine.cpp
LoopDeletion.cpp
LoopDataPrefetch.cpp
LoopDistribute.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2a4c9526dfcd..28157783daa7 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -232,8 +232,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
if (PB == PE) return false;
- // Analyse each switch case in turn. This is done in reverse order so that
- // removing a case doesn't cause trouble for the iteration.
+ // Analyse each switch case in turn.
bool Changed = false;
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
@@ -291,7 +290,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
break;
}
- // Increment the case iterator sense we didn't delete it.
+ // Increment the case iterator since we didn't delete it.
++CI;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0490d93f6455..c0f628eb61e6 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
+ bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o) {}
+ Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
+ e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
+ e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
+ e.commutative = true;
return e;
}
@@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
+ if (PHINode *PN = dyn_cast<PHINode>(V))
+ NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) {
- e = nextValueNumber++;
- valueNumbering[C] = e;
- return e;
+ auto ValNum = assignExpNewValueNum(exp);
+ if (ValNum.second) {
+ valueNumbering[C] = ValNum.first;
+ return ValNum.first;
}
if (!MD) {
- e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
}
@@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
+ case Instruction::PHI:
+ valueNumbering[V] = nextValueNumber;
+ NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+ return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
+ if (Verify) {
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+ return (VI != valueNumbering.end()) ? VI->second : 0;
}
/// Returns the value number of the given comparison,
@@ -549,21 +559,28 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
- return e;
+ return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
+ NumberingPhi.clear();
+ PhiTranslateTable.clear();
nextValueNumber = 1;
+ Expressions.clear();
+ ExprIdx.clear();
+ nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
+ uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
+ // If V is PHINode, V <--> value number is an one-to-one mapping.
+ if (isa<PHINode>(V))
+ NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -602,7 +619,7 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
@@ -1451,6 +1468,95 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+ uint32_t &e = expressionNumbering[Exp];
+ bool CreateNewValNum = !e;
+ if (CreateNewValNum) {
+ Expressions.push_back(Exp);
+ if (ExprIdx.size() < nextValueNumber + 1)
+ ExprIdx.resize(nextValueNumber * 2);
+ e = nextValueNumber;
+ ExprIdx[nextValueNumber++] = nextExprNumber++;
+ }
+ return {e, CreateNewValNum};
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVN &Gvn) {
+ LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+ while (Vals && Vals->BB == BB)
+ Vals = Vals->Next;
+ return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, uint32_t Num,
+ GVN &Gvn) {
+ auto FindRes = PhiTranslateTable.find({Num, Pred});
+ if (FindRes != PhiTranslateTable.end())
+ return FindRes->second;
+ uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+ PhiTranslateTable.insert({{Num, Pred}, NewNum});
+ return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVN &Gvn) {
+ if (PHINode *PN = NumberingPhi[Num]) {
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+ if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+ return TransVal;
+ }
+ return Num;
+ }
+
+ // If there is any value related with Num is defined in a BB other than
+ // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+ // a backedge. We can do an early exit in that case to save compile time.
+ if (!areAllValsInBB(Num, PhiBlock, Gvn))
+ return Num;
+
+ if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
+ return Num;
+ Expression Exp = Expressions[ExprIdx[Num]];
+
+ for (unsigned i = 0; i < Exp.varargs.size(); i++) {
+ // For InsertValue and ExtractValue, some varargs are index numbers
+ // instead of value numbers. Those index numbers should not be
+ // translated.
+ if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
+ (i > 0 && Exp.opcode == Instruction::ExtractValue))
+ continue;
+ Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+ }
+
+ if (Exp.commutative) {
+ assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+ if (Exp.varargs[0] > Exp.varargs[1]) {
+ std::swap(Exp.varargs[0], Exp.varargs[1]);
+ uint32_t Opcode = Exp.opcode >> 8;
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+ Exp.opcode = (Opcode << 8) |
+ CmpInst::getSwappedPredicate(
+ static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+ }
+ }
+
+ if (uint32_t NewNum = expressionNumbering[Exp])
+ return NewNum;
+ return Num;
+}
+
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1495,6 +1601,15 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
+
+void GVN::assignBlockRPONumber(Function &F) {
+ uint32_t NextBlockNumber = 1;
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ for (BasicBlock *BB : RPOT)
+ BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+
// Tries to replace instruction with const, using information from
// ReplaceWithConstMap.
bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
@@ -1856,6 +1971,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
+ assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -1927,7 +2043,7 @@ bool GVN::processBlock(BasicBlock *BB) {
// Instantiate an expression in a predecessor that lacked it.
bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- unsigned int ValNo) {
+ BasicBlock *Curr, unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -1945,7 +2061,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+ uint32_t TValNo =
+ VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
+ if (Value *V = findLeader(Pred, TValNo)) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -1962,10 +2080,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
- VN.add(Instr, ValNo);
+
+ unsigned Num = VN.lookupOrAdd(Instr);
+ VN.add(Instr, Num);
// Update the availability map to include the new instruction.
- addToLeaderTable(ValNo, Instr, Pred);
+ addToLeaderTable(Num, Instr, Pred);
return true;
}
@@ -2003,18 +2123,27 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
for (BasicBlock *P : predecessors(CurrentBlock)) {
- // We're not interested in PRE where the block is its
- // own predecessor, or in blocks with predecessors
- // that are not reachable.
- if (P == CurrentBlock) {
+ // We're not interested in PRE where blocks with predecessors that are
+ // not reachable.
+ if (!DT->isReachableFromEntry(P)) {
NumWithout = 2;
break;
- } else if (!DT->isReachableFromEntry(P)) {
+ }
+ // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
+ // when CurInst has operand defined in CurrentBlock (so it may be defined
+ // by phi in the loop header).
+ if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
+ any_of(CurInst->operands(), [&](const Use &U) {
+ if (auto *Inst = dyn_cast<Instruction>(U.get()))
+ return Inst->getParent() == CurrentBlock;
+ return false;
+ })) {
NumWithout = 2;
break;
}
- Value *predV = findLeader(P, ValNo);
+ uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+ Value *predV = findLeader(P, TValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
@@ -2054,7 +2183,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
}
// We need to insert somewhere, so let's give it a shot
PREInstr = CurInst->clone();
- if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
+ if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
DEBUG(verifyRemoved(PREInstr));
PREInstr->deleteValue();
@@ -2168,6 +2297,7 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
+ BlockRPONumber.clear();
TableAllocator.Reset();
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c120036464d0..05293eb0079f 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -25,6 +25,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -576,7 +577,12 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
assert(Preference == WantInteger && "Compares only produce integers");
- PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ Type *CmpType = Cmp->getType();
+ Value *CmpLHS = Cmp->getOperand(0);
+ Value *CmpRHS = Cmp->getOperand(1);
+ CmpInst::Predicate Pred = Cmp->getPredicate();
+
+ PHINode *PN = dyn_cast<PHINode>(CmpLHS);
if (PN && PN->getParent() == BB) {
const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
@@ -584,15 +590,15 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS = PN->getIncomingValue(i);
- Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+ Value *RHS = CmpRHS->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
+ Value *Res = SimplifyCmpInst(Pred, LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
LazyValueInfo::Tristate
- ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ ResT = LVI->getPredicateOnEdge(Pred, LHS,
cast<Constant>(RHS), PredBB, BB,
CxtI ? CxtI : Cmp);
if (ResT == LazyValueInfo::Unknown)
@@ -609,27 +615,67 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
- if (isa<Constant>(Cmp->getOperand(1)) && !Cmp->getType()->isVectorTy()) {
- Constant *CmpConst = cast<Constant>(Cmp->getOperand(1));
+ if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {
+ Constant *CmpConst = cast<Constant>(CmpRHS);
- if (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ if (!isa<Instruction>(CmpLHS) ||
+ cast<Instruction>(CmpLHS)->getParent() != BB) {
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
- LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ LVI->getPredicateOnEdge(Pred, CmpLHS,
CmpConst, P, BB, CxtI ? CxtI : Cmp);
if (Res == LazyValueInfo::Unknown)
continue;
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Constant *ResC = ConstantInt::get(CmpType, Res);
Result.push_back(std::make_pair(ResC, P));
}
return !Result.empty();
}
+ // InstCombine can fold some forms of constant range checks into
+ // (icmp (add (x, C1)), C2). See if we have we have such a thing with
+ // x as a live-in.
+ {
+ using namespace PatternMatch;
+ Value *AddLHS;
+ ConstantInt *AddConst;
+ if (isa<ConstantInt>(CmpConst) &&
+ match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {
+ if (!isa<Instruction>(AddLHS) ||
+ cast<Instruction>(AddLHS)->getParent() != BB) {
+ for (BasicBlock *P : predecessors(BB)) {
+ // If the value is known by LazyValueInfo to be a ConstantRange in
+ // a predecessor, use that information to try to thread this
+ // block.
+ ConstantRange CR = LVI->getConstantRangeOnEdge(
+ AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));
+ // Propagate the range through the addition.
+ CR = CR.add(AddConst->getValue());
+
+ // Get the range where the compare returns true.
+ ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(
+ Pred, cast<ConstantInt>(CmpConst)->getValue());
+
+ Constant *ResC;
+ if (CmpRange.contains(CR))
+ ResC = ConstantInt::getTrue(CmpType);
+ else if (CmpRange.inverse().contains(CR))
+ ResC = ConstantInt::getFalse(CmpType);
+ else
+ continue;
+
+ Result.push_back(std::make_pair(ResC, P));
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
PredValueInfoTy LHSVals;
@@ -638,8 +684,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
- Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
- V, CmpConst);
+ Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
Result.push_back(std::make_pair(KC, LHSVal.second));
}
@@ -752,6 +797,37 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
+ // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by
+ // BB code within one basic block `BB`), we need to invalidate the LVI
+ // information associated with BB, because the LVI information need not be
+ // true for all of BB after the merge. For example,
+ // Before the merge, LVI info and code is as follows:
+ // SinglePred: <LVI info1 for %p val>
+ // %y = use of %p
+ // call @exit() // need not transfer execution to successor.
+ // assume(%p) // from this point on %p is true
+ // br label %BB
+ // BB: <LVI info2 for %p val, i.e. %p is true>
+ // %x = use of %p
+ // br label exit
+ //
+ // Note that this LVI info for blocks BB and SinglPred is correct for %p
+ // (info2 and info1 respectively). After the merge and the deletion of the
+ // LVI info1 for SinglePred. We have the following code:
+ // BB: <LVI info2 for %p val>
+ // %y = use of %p
+ // call @exit()
+ // assume(%p)
+ // %x = use of %p <-- LVI info2 is correct from here onwards.
+ // br label exit
+ // LVI info2 for BB is incorrect at the beginning of BB.
+
+ // Invalidate LVI information for BB if the LVI is not provably true for
+ // all of BB.
+ if (any_of(*BB, [](Instruction &I) {
+ return !isGuaranteedToTransferExecutionToSuccessor(&I);
+ }))
+ LVI->eraseBlock(BB);
return true;
}
}
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
deleted file mode 100644
index 025ba1bfedc1..000000000000
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This transformation combines adjacent loads.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "load-combine"
-
-STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
-STATISTIC(NumLoadsCombined, "Number of loads combined");
-
-#define LDCOMBINE_NAME "Combine Adjacent Loads"
-
-namespace {
-struct PointerOffsetPair {
- Value *Pointer;
- APInt Offset;
-};
-
-struct LoadPOPPair {
- LoadInst *Load;
- PointerOffsetPair POP;
- /// \brief The new load needs to be created before the first load in IR order.
- unsigned InsertOrder;
-};
-
-class LoadCombine : public BasicBlockPass {
- LLVMContext *C;
- AliasAnalysis *AA;
- DominatorTree *DT;
-
-public:
- LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
- initializeLoadCombinePass(*PassRegistry::getPassRegistry());
- }
-
- using llvm::Pass::doInitialization;
- bool doInitialization(Function &) override;
- bool runOnBasicBlock(BasicBlock &BB) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
- StringRef getPassName() const override { return LDCOMBINE_NAME; }
- static char ID;
-
- typedef IRBuilder<TargetFolder> BuilderTy;
-
-private:
- BuilderTy *Builder;
-
- PointerOffsetPair getPointerOffsetPair(LoadInst &);
- bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
- bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
- bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
-};
-}
-
-bool LoadCombine::doInitialization(Function &F) {
- DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
- C = &F.getContext();
- return true;
-}
-
-PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
- auto &DL = LI.getModule()->getDataLayout();
-
- PointerOffsetPair POP;
- POP.Pointer = LI.getPointerOperand();
- unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace());
- POP.Offset = APInt(BitWidth, 0);
-
- while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
- APInt LastOffset = POP.Offset;
- if (!GEP->accumulateConstantOffset(DL, POP.Offset)) {
- // Can't handle GEPs with variable indices.
- POP.Offset = LastOffset;
- return POP;
- }
- POP.Pointer = GEP->getPointerOperand();
- } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) {
- POP.Pointer = BC->getOperand(0);
- }
- }
- return POP;
-}
-
-bool LoadCombine::combineLoads(
- DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
- bool Combined = false;
- for (auto &Loads : LoadMap) {
- if (Loads.second.size() < 2)
- continue;
- std::sort(Loads.second.begin(), Loads.second.end(),
- [](const LoadPOPPair &A, const LoadPOPPair &B) {
- return A.POP.Offset.slt(B.POP.Offset);
- });
- if (aggregateLoads(Loads.second))
- Combined = true;
- }
- return Combined;
-}
-
-/// \brief Try to aggregate loads from a sorted list of loads to be combined.
-///
-/// It is guaranteed that no writes occur between any of the loads. All loads
-/// have the same base pointer. There are at least two loads.
-bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
- assert(Loads.size() >= 2 && "Insufficient loads!");
- LoadInst *BaseLoad = nullptr;
- SmallVector<LoadPOPPair, 8> AggregateLoads;
- bool Combined = false;
- bool ValidPrevOffset = false;
- APInt PrevOffset;
- uint64_t PrevSize = 0;
- for (auto &L : Loads) {
- if (ValidPrevOffset == false) {
- BaseLoad = L.Load;
- PrevOffset = L.POP.Offset;
- PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
- L.Load->getType());
- AggregateLoads.push_back(L);
- ValidPrevOffset = true;
- continue;
- }
- if (L.Load->getAlignment() > BaseLoad->getAlignment())
- continue;
- APInt PrevEnd = PrevOffset + PrevSize;
- if (L.POP.Offset.sgt(PrevEnd)) {
- // No other load will be combinable
- if (combineLoads(AggregateLoads))
- Combined = true;
- AggregateLoads.clear();
- ValidPrevOffset = false;
- continue;
- }
- if (L.POP.Offset != PrevEnd)
- // This load is offset less than the size of the last load.
- // FIXME: We may want to handle this case.
- continue;
- PrevOffset = L.POP.Offset;
- PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
- L.Load->getType());
- AggregateLoads.push_back(L);
- }
- if (combineLoads(AggregateLoads))
- Combined = true;
- return Combined;
-}
-
-/// \brief Given a list of combinable load. Combine the maximum number of them.
-bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
- // Remove loads from the end while the size is not a power of 2.
- unsigned TotalSize = 0;
- for (const auto &L : Loads)
- TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
- while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
- TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
- if (Loads.size() < 2)
- return false;
-
- DEBUG({
- dbgs() << "***** Combining Loads ******\n";
- for (const auto &L : Loads) {
- dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
- }
- });
-
- // Find first load. This is where we put the new load.
- LoadPOPPair FirstLP;
- FirstLP.InsertOrder = -1u;
- for (const auto &L : Loads)
- if (L.InsertOrder < FirstLP.InsertOrder)
- FirstLP = L;
-
- unsigned AddressSpace =
- FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
-
- Builder->SetInsertPoint(FirstLP.Load);
- Value *Ptr = Builder->CreateConstGEP1_64(
- Builder->CreatePointerCast(Loads[0].POP.Pointer,
- Builder->getInt8PtrTy(AddressSpace)),
- Loads[0].POP.Offset.getSExtValue());
- LoadInst *NewLoad = new LoadInst(
- Builder->CreatePointerCast(
- Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
- Ptr->getType()->getPointerAddressSpace())),
- Twine(Loads[0].Load->getName()) + ".combined", false,
- Loads[0].Load->getAlignment(), FirstLP.Load);
-
- for (const auto &L : Loads) {
- Builder->SetInsertPoint(L.Load);
- Value *V = Builder->CreateExtractInteger(
- L.Load->getModule()->getDataLayout(), NewLoad,
- cast<IntegerType>(L.Load->getType()),
- (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract");
- L.Load->replaceAllUsesWith(V);
- }
-
- NumLoadsCombined += Loads.size();
- return true;
-}
-
-bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
- if (skipBasicBlock(BB))
- return false;
-
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- // Skip analysing dead blocks (not forward reachable from function entry).
- if (!DT->isReachableFromEntry(&BB)) {
- DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() <<
- " in " << BB.getParent()->getName() << "\n");
- return false;
- }
-
- IRBuilder<TargetFolder> TheBuilder(
- BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
- Builder = &TheBuilder;
-
- DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
- AliasSetTracker AST(*AA);
-
- bool Combined = false;
- unsigned Index = 0;
- for (auto &I : BB) {
- if (I.mayThrow() || AST.containsUnknown(&I)) {
- if (combineLoads(LoadMap))
- Combined = true;
- LoadMap.clear();
- AST.clear();
- continue;
- }
- if (I.mayWriteToMemory()) {
- AST.add(&I);
- continue;
- }
- LoadInst *LI = dyn_cast<LoadInst>(&I);
- if (!LI)
- continue;
- ++NumLoadsAnalyzed;
- if (!LI->isSimple() || !LI->getType()->isIntegerTy())
- continue;
- auto POP = getPointerOffsetPair(*LI);
- if (!POP.Pointer)
- continue;
- LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
- AST.add(LI);
- }
- if (combineLoads(LoadMap))
- Combined = true;
- return Combined;
-}
-
-char LoadCombine::ID = 0;
-
-BasicBlockPass *llvm::createLoadCombinePass() {
- return new LoadCombine();
-}
-
-INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false)
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 3151ccd279c4..c41cc42db5e2 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -31,20 +31,19 @@ using namespace llvm;
STATISTIC(NumDeleted, "Number of loops deleted");
/// This function deletes dead loops. The caller of this function needs to
-/// guarantee that the loop is infact dead. Here we handle two kinds of dead
+/// guarantee that the loop is infact dead. Here we handle two kinds of dead
/// loop. The first kind (\p isLoopDead) is where only invariant values from
/// within the loop are used outside of it. The second kind (\p
/// isLoopNeverExecuted) is where the loop is provably never executed. We can
-/// always remove never executed loops since they will not cause any
-/// difference to program behaviour.
+/// always remove never executed loops since they will not cause any difference
+/// to program behaviour.
///
/// This also updates the relevant analysis information in \p DT, \p SE, and \p
/// LI. It also updates the loop PM if an updater struct is provided.
// TODO: This function will be used by loop-simplifyCFG as well. So, move this
// to LoopUtils.cpp
static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &LI, bool LoopIsNeverExecuted,
- LPMUpdater *Updater = nullptr);
+ LoopInfo &LI, LPMUpdater *Updater = nullptr);
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
@@ -168,7 +167,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
BasicBlock *ExitBlock = L->getUniqueExitBlock();
if (ExitBlock && isLoopNeverExecuted(L)) {
- deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater);
+ // Set incoming value to undef for phi nodes in the exit block.
+ BasicBlock::iterator BI = ExitBlock->begin();
+ while (PHINode *P = dyn_cast<PHINode>(BI)) {
+ for (unsigned i = 0; i < P->getNumIncomingValues(); i++)
+ P->setIncomingValue(i, UndefValue::get(P->getType()));
+ BI++;
+ }
+ deleteDeadLoop(L, DT, SE, LI, Updater);
++NumDeleted;
return true;
}
@@ -196,15 +202,14 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (isa<SCEVCouldNotCompute>(S))
return Changed;
- deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater);
+ deleteDeadLoop(L, DT, SE, LI, Updater);
++NumDeleted;
return true;
}
static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &LI, bool LoopIsNeverExecuted,
- LPMUpdater *Updater) {
+ LoopInfo &LI, LPMUpdater *Updater) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
auto *Preheader = L->getLoopPreheader();
assert(Preheader && "Preheader should exist!");
@@ -227,6 +232,8 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
auto *ExitBlock = L->getUniqueExitBlock();
assert(ExitBlock && "Should have a unique exit block!");
+ assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
// Connect the preheader directly to the exit block.
// Even when the loop is never executed, we cannot remove the edge from the
// source block to the exit block. Consider the case where the unexecuted loop
@@ -236,20 +243,28 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
// non-loop, it will be deleted in a future iteration of loop deletion pass.
Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock);
- SmallVector<BasicBlock *, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
// Rewrite phis in the exit block to get their inputs from the Preheader
// instead of the exiting block.
- BasicBlock *ExitingBlock = ExitingBlocks[0];
BasicBlock::iterator BI = ExitBlock->begin();
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- int j = P->getBasicBlockIndex(ExitingBlock);
- assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
- if (LoopIsNeverExecuted)
- P->setIncomingValue(j, UndefValue::get(P->getType()));
- P->setIncomingBlock(j, Preheader);
- for (unsigned i = 1; i < ExitingBlocks.size(); ++i)
- P->removeIncomingValue(ExitingBlocks[i]);
+ // Set the zero'th element of Phi to be from the preheader and remove all
+ // other incoming values. Given the loop has dedicated exits, all other
+ // incoming values must be from the exiting blocks.
+ int PredIndex = 0;
+ P->setIncomingBlock(PredIndex, Preheader);
+ // Removes all incoming values from all other exiting blocks (including
+ // duplicate values from an exiting block).
+ // Nuke all entries except the zero'th entry which is the preheader entry.
+ // NOTE! We need to remove Incoming Values in the reverse order as done
+ // below, to keep the indices valid for deletion (removeIncomingValues
+ // updates getNumIncomingValues and shifts all values down into the operand
+ // being deleted).
+ for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
+ P->removeIncomingValue(e-i, false);
+
+ assert((P->getNumIncomingValues() == 1 &&
+ P->getIncomingBlock(PredIndex) == Preheader) &&
+ "Should have exactly one value and that's from the preheader!");
++BI;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b027278b24f2..73436f13c94e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim(
// The flag adds instruction count to solutions cost comparision.
static cl::opt<bool> InsnsCost(
- "lsr-insns-cost", cl::Hidden, cl::init(true),
+ "lsr-insns-cost", cl::Hidden, cl::init(false),
cl::desc("Add instruction count to a LSR cost model"));
// Flag to choose how to narrow complex lsr solution
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index cbbd55512c9f..7a7624f77542 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1244,27 +1244,24 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
// only do this for simple stores, we should expand to cover memcpys, etc.
const auto *LastStore = createStoreExpression(SI, StoreRHS);
const auto *LastCC = ExpressionToClass.lookup(LastStore);
- // Basically, check if the congruence class the store is in is defined by a
- // store that isn't us, and has the same value. MemorySSA takes care of
- // ensuring the store has the same memory state as us already.
- // The RepStoredValue gets nulled if all the stores disappear in a class, so
- // we don't need to check if the class contains a store besides us.
- if (LastCC &&
- LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand()))
+ // We really want to check whether the expression we matched was a store. No
+ // easy way to do that. However, we can check that the class we found has a
+ // store, which, assuming the value numbering state is not corrupt, is
+ // sufficient, because we must also be equivalent to that store's expression
+ // for it to be in the same class as the load.
+ if (LastCC && LastCC->getStoredValue() == LastStore->getStoredValue())
return LastStore;
- deleteExpression(LastStore);
// Also check if our value operand is defined by a load of the same memory
// location, and the memory state is the same as it was then (otherwise, it
// could have been overwritten later. See test32 in
// transforms/DeadStoreElimination/simple.ll).
- if (auto *LI =
- dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
+ if (auto *LI = dyn_cast<LoadInst>(LastStore->getStoredValue()))
if ((lookupOperandLeader(LI->getPointerOperand()) ==
- lookupOperandLeader(SI->getPointerOperand())) &&
+ LastStore->getOperand(0)) &&
(lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
StoreRHS))
- return createStoreExpression(SI, StoreRHS);
- }
+ return LastStore;
+ deleteExpression(LastStore);
}
// If the store is not equivalent to anything, value number it as a store that
@@ -2332,9 +2329,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// see if we know some constant value for it already.
Value *NewGVN::findConditionEquivalence(Value *Cond) const {
auto Result = lookupOperandLeader(Cond);
- if (isa<Constant>(Result))
- return Result;
- return nullptr;
+ return isa<Constant>(Result) ? Result : nullptr;
}
// Process the outgoing edges of a block for reachability.
@@ -3014,14 +3009,27 @@ void NewGVN::verifyIterationSettled(Function &F) {
// a no-longer valid StoreExpression.
void NewGVN::verifyStoreExpressions() const {
#ifndef NDEBUG
- DenseSet<std::pair<const Value *, const Value *>> StoreExpressionSet;
+ // This is the only use of this, and it's not worth defining a complicated
+ // densemapinfo hash/equality function for it.
+ std::set<
+ std::pair<const Value *,
+ std::tuple<const Value *, const CongruenceClass *, Value *>>>
+ StoreExpressionSet;
for (const auto &KV : ExpressionToClass) {
if (auto *SE = dyn_cast<StoreExpression>(KV.first)) {
// Make sure a version that will conflict with loads is not already there
- auto Res =
- StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()});
- assert(Res.second &&
- "Stored expression conflict exists in expression table");
+ auto Res = StoreExpressionSet.insert(
+ {SE->getOperand(0), std::make_tuple(SE->getMemoryLeader(), KV.second,
+ SE->getStoredValue())});
+ bool Okay = Res.second;
+ // It's okay to have the same expression already in there if it is
+ // identical in nature.
+ // This can happen when the leader of the stored value changes over time.
+ if (!Okay)
+ Okay = (std::get<1>(Res.first->second) == KV.second) &&
+ (lookupOperandLeader(std::get<2>(Res.first->second)) ==
+ lookupOperandLeader(SE->getStoredValue()));
+ assert(Okay && "Stored expression conflict exists in expression table");
auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst());
assert(ValueExpr && ValueExpr->equals(*SE) &&
"StoreExpression in ExpressionToClass is not latest "
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index a20890b22603..6da551bd7efd 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -106,11 +107,12 @@ XorOpnd::XorOpnd(Value *V) {
I->getOpcode() == Instruction::And)) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- if (isa<ConstantInt>(V0))
+ const APInt *C;
+ if (match(V0, PatternMatch::m_APInt(C)))
std::swap(V0, V1);
- if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
- ConstPart = C->getValue();
+ if (match(V1, PatternMatch::m_APInt(C))) {
+ ConstPart = *C;
SymbolicPart = V0;
isOr = (I->getOpcode() == Instruction::Or);
return;
@@ -119,7 +121,7 @@ XorOpnd::XorOpnd(Value *V) {
// view the operand as "V | 0"
SymbolicPart = V;
- ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+ ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
isOr = true;
}
@@ -955,8 +957,8 @@ static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
/// Scan backwards and forwards among values with the same rank as element i
/// to see if X exists. If X does not exist, return i. This is useful when
/// scanning for 'x' when we see '-x' because they both get the same rank.
-static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
- Value *X) {
+static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
+ unsigned i, Value *X) {
unsigned XRank = Ops[i].Rank;
unsigned e = Ops.size();
for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) {
@@ -1134,20 +1136,19 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
/// be returned.
-static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
- if (ConstOpnd != 0) {
- if (!ConstOpnd.isAllOnesValue()) {
- LLVMContext &Ctx = Opnd->getType()->getContext();
- Instruction *I;
- I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
- "and.ra", InsertBefore);
- I->setDebugLoc(InsertBefore->getDebugLoc());
- return I;
- }
+ if (ConstOpnd.isNullValue())
+ return nullptr;
+
+ if (ConstOpnd.isAllOnesValue())
return Opnd;
- }
- return nullptr;
+
+ Instruction *I = BinaryOperator::CreateAnd(
+ Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra",
+ InsertBefore);
+ I->setDebugLoc(InsertBefore->getDebugLoc());
+ return I;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1163,24 +1164,24 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
- if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
- if (!Opnd1->getValue()->hasOneUse())
- return false;
+ if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+ return false;
- const APInt &C1 = Opnd1->getConstPart();
- if (C1 != ConstOpnd)
- return false;
+ if (!Opnd1->getValue()->hasOneUse())
+ return false;
- Value *X = Opnd1->getSymbolicPart();
- Res = createAndInstr(I, X, ~C1);
- // ConstOpnd was C2, now C1 ^ C2.
- ConstOpnd ^= C1;
+ const APInt &C1 = Opnd1->getConstPart();
+ if (C1 != ConstOpnd)
+ return false;
- if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
- RedoInsts.insert(T);
- return true;
- }
- return false;
+ Value *X = Opnd1->getSymbolicPart();
+ Res = createAndInstr(I, X, ~C1);
+ // ConstOpnd was C2, now C1 ^ C2.
+ ConstOpnd ^= C1;
+
+ if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+ RedoInsts.insert(T);
+ return true;
}
@@ -1221,8 +1222,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3((~C1) ^ C2);
// Do not increase code size!
- if (C3 != 0 && !C3.isAllOnesValue()) {
- int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
@@ -1238,8 +1239,8 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3 = C1 ^ C2;
// Do not increase code size
- if (C3 != 0 && !C3.isAllOnesValue()) {
- int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+ if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
}
@@ -1279,17 +1280,20 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
- APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+ APInt ConstOpnd(Ty->getScalarSizeInBits(), 0);
// Step 1: Convert ValueEntry to XorOpnd
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *V = Ops[i].Op;
- if (!isa<ConstantInt>(V)) {
+ const APInt *C;
+ // TODO: Support non-splat vectors.
+ if (match(V, PatternMatch::m_APInt(C))) {
+ ConstOpnd ^= *C;
+ } else {
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
- } else
- ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+ }
}
// NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
@@ -1327,7 +1331,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isNullValue() &&
+ CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1369,17 +1374,17 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
- if (ConstOpnd != 0) {
- Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+ if (!ConstOpnd.isNullValue()) {
+ Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
}
- int Sz = Ops.size();
+ unsigned Sz = Ops.size();
if (Sz == 1)
return Ops.back().Op;
- else if (Sz == 0) {
- assert(ConstOpnd == 0);
- return ConstantInt::get(Ty->getContext(), ConstOpnd);
+ if (Sz == 0) {
+ assert(ConstOpnd.isNullValue());
+ return ConstantInt::get(Ty, ConstOpnd);
}
}
@@ -1627,8 +1632,8 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)]
///
/// \returns Whether any factors have a power greater than one.
-bool ReassociatePass::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
- SmallVectorImpl<Factor> &Factors) {
+static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
+ SmallVectorImpl<Factor> &Factors) {
// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
// Compute the sum of powers of simplifiable factors.
unsigned FactorPowerSum = 0;
@@ -1999,11 +2004,6 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
if (I->isCommutative())
canonicalizeOperands(I);
- // TODO: We should optimize vector Xor instructions, but they are
- // currently unsupported.
- if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
- return;
-
// Don't optimize floating point instructions that don't have unsafe algebra.
if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
return;
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index c6929c33b3e9..7a6fa1711411 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -536,9 +536,10 @@ private:
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitInstruction(Instruction &I) {
- // If a new instruction is added to LLVM that we don't handle.
+ // All the instructions we don't do any special handling for just
+ // go to overdefined.
DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
- markOverdefined(&I); // Just in case
+ markOverdefined(&I);
}
};
@@ -1814,15 +1815,11 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (F.isDeclaration())
continue;
- if (Solver.isBlockExecutable(&F.front())) {
+ if (Solver.isBlockExecutable(&F.front()))
for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;
- ++AI) {
- if (AI->use_empty())
- continue;
- if (tryToReplaceWithConstant(Solver, &*AI))
+ ++AI)
+ if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI))
++IPNumArgsElimed;
- }
- }
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (!Solver.isBlockExecutable(&*BB)) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1527f15f18a3..80fbbeb6829b 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1252,7 +1252,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
if (!LI || !LI->isSimple())
return false;
- // Both operands to the select need to be dereferencable, either
+ // Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
@@ -1637,8 +1637,17 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
cast<PointerType>(OldTy)->getPointerAddressSpace();
}
- if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
- return true;
+
+ // We can convert integers to integral pointers, but not to non-integral
+ // pointers.
+ if (OldTy->isIntegerTy())
+ return !DL.isNonIntegralPointerType(NewTy);
+
+ // We can convert integral pointers to integers, but non-integral pointers
+ // need to remain pointers.
+ if (!DL.isNonIntegralPointerType(OldTy))
+ return NewTy->isIntegerTy();
+
return false;
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 850a01114eeb..ce6f93eb0c15 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -91,7 +91,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSeparateConstOffsetFromGEPPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReducePass(Registry);
- initializeLoadCombinePass(Registry);
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3e5993618c4c..9397b87cdf56 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -321,7 +321,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
///
-static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
+static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
if (I->mayHaveSideEffects()) // This also handles volatile loads.
@@ -332,10 +332,10 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
if (CI->mayHaveSideEffects()) {
// Non-volatile loads may be moved above a call with side effects if it
// does not write to memory and the load provably won't trap.
- // FIXME: Writes to memory only matter if they may alias the pointer
+ // Writes to memory only matter if they may alias the pointer
// being loaded from.
const DataLayout &DL = L->getModule()->getDataLayout();
- if (CI->mayWriteToMemory() ||
+ if ((AA->getModRefInfo(CI, MemoryLocation::get(L)) & MRI_Mod) ||
!isSafeToLoadUnconditionally(L->getPointerOperand(),
L->getAlignment(), DL, L))
return false;
@@ -492,10 +492,11 @@ static CallInst *findTRECandidate(Instruction *TI,
return CI;
}
-static bool
-eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVectorImpl<PHINode *> &ArgumentPHIs) {
+static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVectorImpl<PHINode *> &ArgumentPHIs,
+ AliasAnalysis *AA) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -515,7 +516,8 @@ eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry,
// Check that this is the case now.
BasicBlock::iterator BBI(CI);
for (++BBI; &*BBI != Ret; ++BBI) {
- if (canMoveAboveCall(&*BBI, CI)) continue;
+ if (canMoveAboveCall(&*BBI, CI, AA))
+ continue;
// If we can't move the instruction above the call, it might be because it
// is an associative and commutative operation that could be transformed
@@ -674,12 +676,17 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
bool Change = false;
+ // Make sure this block is a trivial return block.
+ assert(BB->getFirstNonPHIOrDbg() == Ret &&
+ "Trying to fold non-trivial return block");
+
// If the return block contains nothing but the return and PHI's,
// there might be an opportunity to duplicate the return in its
- // predecessors and perform TRC there. Look for predecessors that end
+ // predecessors and perform TRE there. Look for predecessors that end
// in unconditional branch and recursive call(s).
SmallVector<BranchInst*, 8> UncondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -706,7 +713,7 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret,
BB->eraseFromParent();
eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs);
+ ArgumentPHIs, AA);
++NumRetDuped;
Change = true;
}
@@ -719,16 +726,18 @@ static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVectorImpl<PHINode *> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail, TTI);
if (!CI)
return false;
return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs);
+ ArgumentPHIs, AA);
}
-static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) {
+static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI,
+ AliasAnalysis *AA) {
if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
return false;
@@ -763,11 +772,11 @@ static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change =
processReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ ArgumentPHIs, !CanTRETailMarkedCall, TTI, AA);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change =
- foldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall, TTI);
+ Change = foldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall, TTI, AA);
MadeChange |= Change;
}
}
@@ -797,6 +806,7 @@ struct TailCallElim : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -805,7 +815,8 @@ struct TailCallElim : public FunctionPass {
return false;
return eliminateTailRecursion(
- F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F));
+ F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
+ &getAnalysis<AAResultsWrapperPass>().getAAResults());
}
};
}
@@ -826,8 +837,9 @@ PreservedAnalyses TailCallElimPass::run(Function &F,
FunctionAnalysisManager &AM) {
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
- bool Changed = eliminateTailRecursion(F, &TTI);
+ bool Changed = eliminateTailRecursion(F, &TTI, &AA);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index ebde1f9a17dd..b60dfb4f3541 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -116,6 +116,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
case LibFunc_wcslen:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
return Changed;
case LibFunc_strchr:
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 2af671636cbd..5127eba3f9ae 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -1081,7 +1082,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
}
/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
-static bool PhiHasDebugValue(DILocalVariable *DIVar,
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
DIExpression *DIExpr,
PHINode *APN) {
// Since we can't guarantee that the original dbg.declare instrinsic
@@ -1159,7 +1160,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DbgValue->insertAfter(LI);
}
-/// Inserts a llvm.dbg.value intrinsic after a phi
+/// Inserts a llvm.dbg.value intrinsic after a phi
/// that has an associated llvm.dbg.decl intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
PHINode *APN, DIBuilder &Builder) {
@@ -1742,12 +1743,12 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
// Preserve !invariant.group in K.
break;
case LLVMContext::MD_align:
- K->setMetadata(Kind,
+ K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_dereferenceable:
case LLVMContext::MD_dereferenceable_or_null:
- K->setMetadata(Kind,
+ K->setMetadata(Kind,
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
}
@@ -1847,6 +1848,49 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
return false;
}
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+ LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+ return;
+ }
+
+ // The only other translation we can do is to integral loads with !range
+ // metadata.
+ if (!NewTy->isIntegerTy())
+ return;
+
+ MDBuilder MDB(NewLI.getContext());
+ const Value *Ptr = OldLI.getPointerOperand();
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLI.setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+ MDNode *N, LoadInst &NewLI) {
+ auto *NewTy = NewLI.getType();
+
+ // Give up unless it is converted to a pointer where there is a single very
+ // valuable mapping we can do reliably.
+ // FIXME: It would be nice to propagate this in more ways, but the type
+ // conversions make it hard.
+ if (!NewTy->isPointerTy())
+ return;
+
+ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ MDNode *NN = MDNode::get(OldLI.getContext(), None);
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+ }
+}
+
namespace {
/// A potential constituent of a bitreverse or bswap expression. See
/// collectBitParts for a fuller explanation.
@@ -1968,7 +2012,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
unsigned NumMaskedBits = AndMask.countPopulation();
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
return Result;
-
+
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
MatchBitReversals, BPS);
if (!Res)
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f3db278ef1e4..e21e34df8ded 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -72,7 +72,6 @@ using namespace llvm;
#define DEBUG_TYPE "loop-simplify"
-STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
// If the block isn't already, move the new block to right after some 'outside
@@ -152,37 +151,6 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
return PreheaderBB;
}
-/// \brief Ensure that the loop preheader dominates all exit blocks.
-///
-/// This method is used to split exit blocks that have predecessors outside of
-/// the loop.
-static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
- DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA) {
- SmallVector<BasicBlock*, 8> LoopBlocks;
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
- BasicBlock *P = *I;
- if (L->contains(P)) {
- // Don't do this if the loop is exited via an indirect branch.
- if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
-
- LoopBlocks.push_back(P);
- }
- }
-
- assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewExitBB = nullptr;
-
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI,
- PreserveLCSSA);
- if (!NewExitBB)
- return nullptr;
-
- DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
- << NewExitBB->getName() << "\n");
- return NewExitBB;
-}
-
/// Add the specified block, and all of its predecessors, to the specified set,
/// if it's not already in there. Stop predecessor traversal when we reach
/// StopBlock.
@@ -346,16 +314,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (BasicBlock *ExitBlock : ExitBlockSet) {
- if (any_of(predecessors(ExitBlock),
- [L](BasicBlock *BB) { return !L->contains(BB); })) {
- rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
- }
- }
+ formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
@@ -563,29 +522,16 @@ ReprocessLoop:
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
- if (Preheader) {
- ++NumInserted;
+ if (Preheader)
Changed = true;
- }
}
// Next, check to make sure that all exit nodes of the loop only have
// predecessors that are inside of the loop. This check guarantees that the
// loop preheader/header will dominate the exit blocks. If the exit block has
// predecessors from outside of the loop, split the edge now.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (BasicBlock *ExitBlock : ExitBlockSet) {
- if (any_of(predecessors(ExitBlock),
- [L](BasicBlock *BB) { return !L->contains(BB); })) {
- rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
- ++NumInserted;
- Changed = true;
- }
- }
+ if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+ Changed = true;
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
@@ -614,10 +560,8 @@ ReprocessLoop:
// insert a new block that all backedges target, then make it jump to the
// loop header.
LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
- if (LoopLatch) {
- ++NumInserted;
+ if (LoopLatch)
Changed = true;
- }
}
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
@@ -645,7 +589,22 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- if (ExitBlockSet.size() == 1) {
+ auto HasUniqueExitBlock = [&]() {
+ BasicBlock *UniqueExit = nullptr;
+ for (auto *ExitingBB : ExitingBlocks)
+ for (auto *SuccBB : successors(ExitingBB)) {
+ if (L->contains(SuccBB))
+ continue;
+
+ if (!UniqueExit)
+ UniqueExit = SuccBB;
+ else if (UniqueExit != SuccBB)
+ return false;
+ }
+
+ return true;
+ };
+ if (HasUniqueExitBlock()) {
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index a920cd86a26a..5f85e17927fa 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -472,10 +472,22 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// exit block only.
if (!L->isLoopSimplifyForm())
return false;
- BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
- if (!Exit)
- return false;
+ // Guaranteed by LoopSimplifyForm.
+ BasicBlock *Latch = L->getLoopLatch();
+
+ BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
+ if (!LatchExit)
+ return false;
+ // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+ // targets of the Latch be the single exit block out of the loop. This needs
+ // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+ BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+ assert((LatchBR->getSuccessor(0) == LatchExit ||
+ LatchBR->getSuccessor(1) == LatchExit) &&
+ "one of the loop latch successors should be "
+ "the exit block!");
+ (void)LatchBR;
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
if (!SE)
@@ -510,25 +522,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (Log2_32(Count) > BEWidth)
return false;
- BasicBlock *Latch = L->getLoopLatch();
-
- // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
- // targets of the Latch be the single exit block out of the loop. This needs
- // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- assert(
- (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) &&
- "one of the loop latch successors should be "
- "the exit block!");
- // Avoid warning of unused `LatchBR` variable in release builds.
- (void)LatchBR;
// Loop structure is the following:
//
// PreHeader
// Header
// ...
// Latch
- // Exit
+ // LatchExit
BasicBlock *NewPreHeader;
BasicBlock *NewExit = nullptr;
@@ -541,9 +541,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Split PreHeader to insert a branch around loop for unrolling.
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
- // Split Exit to create phi nodes from branch above.
- SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
- NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
+ // Split LatchExit to create phi nodes from branch above.
+ SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
+ NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
DT, LI, PreserveLCSSA);
// Split NewExit to insert epilog remainder loop.
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
@@ -570,7 +570,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Latch Header
// *NewExit ...
// *EpilogPreHeader Latch
- // Exit Exit
+ // LatchExit LatchExit
// Calculate conditions for branch around loop for unrolling
// in epilog case and around prolog remainder loop in prolog case.
@@ -648,7 +648,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
- BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
+ BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
@@ -672,7 +672,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// EpilogHeader Header
// ... ...
// EpilogLatch Latch
- // Exit Exit
+ // LatchExit LatchExit
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
@@ -686,7 +686,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (UseEpilogRemainder) {
// Connect the epilog code to the original loop and update the
// PHI functions.
- ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
+ ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
EpilogPreHeader, NewPreHeader, VMap, DT, LI,
PreserveLCSSA);
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 412f6129407e..0ed33945ef40 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
@@ -29,6 +30,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -87,8 +89,7 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
// with a new integer type of the corresponding bit width.
- if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
- m_And(m_APInt(M), m_Instruction(I))))) {
+ if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
int32_t Bits = (*M + 1).exactLogBase2();
if (Bits > 0) {
RT = IntegerType::get(Phi->getContext(), Bits);
@@ -923,6 +924,69 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
return true;
}
+bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ bool PreserveLCSSA) {
+ bool Changed = false;
+
+ // We re-use a vector for the in-loop predecesosrs.
+ SmallVector<BasicBlock *, 4> InLoopPredecessors;
+
+ auto RewriteExit = [&](BasicBlock *BB) {
+ assert(InLoopPredecessors.empty() &&
+ "Must start with an empty predecessors list!");
+ auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
+
+ // See if there are any non-loop predecessors of this exit block and
+ // keep track of the in-loop predecessors.
+ bool IsDedicatedExit = true;
+ for (auto *PredBB : predecessors(BB))
+ if (L->contains(PredBB)) {
+ if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from an indirectbr.
+ return false;
+
+ InLoopPredecessors.push_back(PredBB);
+ } else {
+ IsDedicatedExit = false;
+ }
+
+ assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
+
+ // Nothing to do if this is already a dedicated exit.
+ if (IsDedicatedExit)
+ return false;
+
+ auto *NewExitBB = SplitBlockPredecessors(
+ BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
+
+ if (!NewExitBB)
+ DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
+ << *L << "\n");
+ else
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewExitBB->getName() << "\n");
+ return true;
+ };
+
+ // Walk the exit blocks directly rather than building up a data structure for
+ // them, but only visit each one once.
+ SmallPtrSet<BasicBlock *, 4> Visited;
+ for (auto *BB : L->blocks())
+ for (auto *SuccBB : successors(BB)) {
+ // We're looking for exit blocks so skip in-loop successors.
+ if (L->contains(SuccBB))
+ continue;
+
+ // Visit each exit block exactly once.
+ if (!Visited.insert(SuccBB).second)
+ continue;
+
+ Changed |= RewriteExit(SuccBB);
+ }
+
+ return Changed;
+}
+
/// \brief Returns the instructions that use values defined in the loop.
SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
SmallVector<Instruction *, 8> UsedOutside;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1abdb2484850..eac2867233bc 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5702,14 +5702,14 @@ bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I,
void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
// We should not collect Uniforms more than once per VF. Right now,
- // this function is called from collectUniformsAndScalars(), which
+ // this function is called from collectUniformsAndScalars(), which
// already does this check. Collecting Uniforms for VF=1 does not make any
// sense.
assert(VF >= 2 && !Uniforms.count(VF) &&
"This function should not be visited twice for the same VF");
- // Visit the list of Uniforms. If we'll not find any uniform value, we'll
+ // Visit the list of Uniforms. If we'll not find any uniform value, we'll
// not analyze again. Uniforms.count(VF) will return 1.
Uniforms[VF].clear();
@@ -5988,10 +5988,10 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
continue;
Value *Ptr = getPointerOperand(&I);
- // We don't check wrapping here because we don't know yet if Ptr will be
- // part of a full group or a group with gaps. Checking wrapping for all
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
// pointers (even those that end up in groups with no gaps) will be overly
- // conservative. For full groups, wrapping should be ok since if we would
+ // conservative. For full groups, wrapping should be ok since if we would
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
@@ -6244,7 +6244,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
if (LastMember) {
Value *LastMemberPtr = getPointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true)) {
DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
"last group member potentially pointer-wrapping.\n");
@@ -6252,9 +6252,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
}
} else {
// Case 3: A non-reversed interleaved load group with gaps: We need
- // to execute at least one scalar epilogue iteration. This will ensure
+ // to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
- // to look for a member at index factor - 1, since every group must have
+ // to look for a member at index factor - 1, since every group must have
// a member at index zero.
if (Group->isReverse()) {
releaseGroup(Group);
@@ -7789,8 +7789,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the loop for a trip count threshold:
// do not vectorize loops with a tiny trip count.
- const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L);
- if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) {
+ unsigned ExpectedTC = SE->getSmallConstantMaxTripCount(L);
+ bool HasExpectedTC = (ExpectedTC > 0);
+
+ if (!HasExpectedTC && LoopVectorizeWithBlockFrequency) {
+ auto EstimatedTC = getLoopEstimatedTripCount(L);
+ if (EstimatedTC) {
+ ExpectedTC = *EstimatedTC;
+ HasExpectedTC = true;
+ }
+ }
+
+ if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
<< "This loop is not worth vectorizing.");
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
@@ -7822,18 +7832,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool OptForSize =
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
- // Compute the weighted frequency of this loop being executed and see if it
- // is less than 20% of the function entry baseline frequency. Note that we
- // always have a canonical loop here because we think we *can* vectorize.
- // FIXME: This is hidden behind a flag due to pervasive problems with
- // exactly what block frequency models.
- if (LoopVectorizeWithBlockFrequency) {
- BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
- if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- LoopEntryFreq < ColdEntryFreq)
- OptForSize = true;
- }
-
// Check the function attributes to see if implicit floats are allowed.
// FIXME: This check doesn't seem possibly correct -- what if the loop is
// an integer loop and the vector instructions selected are purely integer
@@ -8015,11 +8013,6 @@ bool LoopVectorizePass::runImpl(
DB = &DB_;
ORE = &ORE_;
- // Compute some weights outside of the loop over the loops. Compute this
- // using a BranchProbability to re-use its scaling math.
- const BranchProbability ColdProb(1, 5); // 20%
- ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
-
// Don't attempt if
// 1. the target claims to have no vector registers, and
// 2. interleaving won't help ILP.
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d1349535f298..b267230d3185 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3327,12 +3327,10 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
- if (!DestBundle->IsScheduled) {
+ if (!DestBundle->IsScheduled)
BundleMember->incrementUnscheduledDeps(1);
- }
- if (!DestBundle->hasValidDependencies()) {
+ if (!DestBundle->hasValidDependencies())
WorkList.push_back(DestBundle);
- }
}
} else {
// I'm not sure if this can ever happen. But we need to be safe.