summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp64
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp30
2 files changed, 35 insertions, 59 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index eb82ee283d44..012b10c8a9b0 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -574,11 +574,9 @@ protected:
/// Returns (and creates if needed) the trip count of the widened loop.
Value *getOrCreateVectorTripCount(Loop *NewLoop);
- /// Emit a bypass check to see if the trip count would overflow, or we
- /// wouldn't have enough iterations to execute one vector loop.
+ /// Emit a bypass check to see if the vector trip count is zero, including if
+ /// it overflows.
void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
- /// Emit a bypass check to see if the vector trip count is nonzero.
- void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
/// Emit a bypass check to see if all of the SCEV assumptions we've
/// had to make are correct.
void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
@@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
BasicBlock *BB = L->getLoopPreheader();
IRBuilder<> Builder(BB->getTerminator());
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
- Value *CheckMinIters = Builder.CreateICmpULT(
- Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
+ // Generate code to check if the loop's trip count is less than VF * UF, or
+ // equal to it in case a scalar epilogue is required; this implies that the
+ // vector trip count is zero. This check also covers the case where adding one
+ // to the backedge-taken count overflowed leading to an incorrect trip count
+ // of zero. In this case we will also jump to the scalar loop.
+ auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
+ Value *CheckMinIters = Builder.CreateICmp(
+ P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
- BasicBlock *NewBB =
- BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
- // Update dominator tree immediately if the generated block is a
- // LoopBypassBlock because SCEV expansions to generate loop bypass
- // checks may query it before the current function is finished.
- DT->addNewBlock(NewBB, BB);
- if (L->getParentLoop())
- L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
- ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, CheckMinIters));
- LoopBypassBlocks.push_back(BB);
-}
-
-void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
- BasicBlock *Bypass) {
- Value *TC = getOrCreateVectorTripCount(L);
- BasicBlock *BB = L->getLoopPreheader();
- IRBuilder<> Builder(BB->getTerminator());
-
- // Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop.
- Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
- "cmp.zero");
-
- // Generate code to check that the loop's trip count that we computed by
- // adding one to the backedge-taken count will not overflow.
BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
// Update dominator tree immediately if the generated block is a
// LoopBypassBlock because SCEV expansions to generate loop bypass
@@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
if (L->getParentLoop())
L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
ReplaceInstWithInst(BB->getTerminator(),
- BranchInst::Create(Bypass, NewBB, Cmp));
+ BranchInst::Create(Bypass, NewBB, CheckMinIters));
LoopBypassBlocks.push_back(BB);
}
@@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- // We need to test whether the backedge-taken count is uint##_max. Adding one
- // to it will cause overflow and an incorrect loop trip count in the vector
- // body. In case of overflow we want to directly jump to the scalar remainder
- // loop.
- emitMinimumIterationCountCheck(Lp, ScalarPH);
// Now, compare the new count to zero. If it is zero skip the vector loop and
- // jump to the scalar loop.
- emitVectorLoopEnteredCheck(Lp, ScalarPH);
+ // jump to the scalar loop. This check also covers the case where the
+ // backedge-taken count is uint##_max: adding one to it will overflow leading
+ // to an incorrect trip count of zero. In this (rare) case we will also jump
+ // to the scalar loop.
+ emitMinimumIterationCountCheck(Lp, ScalarPH);
+
// Generate the code to check any assumptions that we've made for SCEV
// expressions.
emitSCEVChecks(Lp, ScalarPH);
@@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// We know what the end value is.
EndValue = CountRoundDown;
} else {
- IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+ IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
@@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// To do so, we need to generate the 'identity' vector and override
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4425043ad39a..dcbcab459a6b 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -434,7 +434,7 @@ private:
/// \returns the pointer to the vectorized value if \p VL is already
/// vectorized, or NULL. They may happen in cycles.
- Value *alreadyVectorized(ArrayRef<Value *> VL) const;
+ Value *alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const;
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
@@ -857,7 +857,7 @@ private:
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
- bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
+ bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP, Value *OpValue);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
@@ -1212,7 +1212,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
Instruction *VL0 = cast<Instruction>(VL[0]);
- BasicBlock *BB = cast<Instruction>(VL0)->getParent();
+ BasicBlock *BB = VL0->getParent();
if (!DT->isReachableFromEntry(BB)) {
// Don't go into unreachable blocks. They may contain instructions with
@@ -1237,7 +1237,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
BlockScheduling &BS = *BSRef.get();
- if (!BS.tryScheduleBundle(VL, this)) {
+ if (!BS.tryScheduleBundle(VL, this, VL0)) {
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL[0]) ||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
@@ -2427,8 +2427,8 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
return Vec;
}
-Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
- if (const TreeEntry *En = getTreeEntry(VL[0])) {
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL, Value *OpValue) const {
+ if (const TreeEntry *En = getTreeEntry(OpValue)) {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -2553,7 +2553,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *InVec = vectorizeTree(INVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
CastInst *CI = dyn_cast<CastInst>(VL0);
@@ -2575,7 +2575,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *L = vectorizeTree(LHSV);
Value *R = vectorizeTree(RHSV);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
@@ -2604,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *True = vectorizeTree(TrueVec);
Value *False = vectorizeTree(FalseVec);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
Value *V = Builder.CreateSelect(Cond, True, False);
@@ -2644,7 +2644,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
@@ -2806,7 +2806,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
- if (Value *V = alreadyVectorized(E->Scalars))
+ if (Value *V = alreadyVectorized(E->Scalars, VL0))
return V;
// Create a vector of LHS op1 RHS
@@ -3097,8 +3097,8 @@ void BoUpSLP::optimizeGatherSequence() {
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
- BoUpSLP *SLP) {
- if (isa<PHINode>(VL[0]))
+ BoUpSLP *SLP, Value *OpValue) {
+ if (isa<PHINode>(OpValue))
return true;
// Initialize the instruction bundle.
@@ -3106,7 +3106,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
ScheduleData *PrevInBundle = nullptr;
ScheduleData *Bundle = nullptr;
bool ReSchedule = false;
- DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
+ DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n");
// Make sure that the scheduling region contains all
// instructions of the bundle.
@@ -3177,7 +3177,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
}
}
if (!Bundle->isReady()) {
- cancelScheduling(VL, VL[0]);
+ cancelScheduling(VL, OpValue);
return false;
}
return true;