summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Transforms/Vectorize/SLPVectorizer.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp59
1 files changed, 31 insertions, 28 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5c2efe885e22..2e856a7e6802 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1536,12 +1536,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check for terminator values (e.g. invoke).
for (unsigned j = 0; j < VL.size(); ++j)
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
- TerminatorInst *Term = dyn_cast<TerminatorInst>(
- cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
- if (Term) {
- LLVM_DEBUG(
- dbgs()
- << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ Instruction *Term = dyn_cast<Instruction>(
+ cast<PHINode>(VL[j])->getIncomingValueForBlock(
+ PH->getIncomingBlock(i)));
+ if (Term && Term->isTerminator()) {
+ LLVM_DEBUG(dbgs()
+ << "SLP: Need to swizzle PHINodes (terminator use).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
return;
@@ -2164,7 +2164,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// extractelement/ext pair.
DeadCost -= TTI->getExtractWithExtendCost(
Ext->getOpcode(), Ext->getType(), VecTy, i);
- // Add back the cost of s|zext which is subtracted seperately.
+ // Add back the cost of s|zext which is subtracted separately.
DeadCost += TTI->getCastInstrCost(
Ext->getOpcode(), Ext->getType(), E->getType(), Ext);
continue;
@@ -2536,13 +2536,13 @@ int BoUpSLP::getTreeCost() {
// uses. However, we should not compute the cost of duplicate sequences.
// For example, if we have a build vector (i.e., insertelement sequence)
// that is used by more than one vector instruction, we only need to
- // compute the cost of the insertelement instructions once. The redundent
+ // compute the cost of the insertelement instructions once. The redundant
// instructions will be eliminated by CSE.
//
// We should consider not creating duplicate tree entries for gather
// sequences, and instead add additional edges to the tree representing
// their uses. Since such an approach results in fewer total entries,
- // existing heuristics based on tree size may yeild different results.
+ // existing heuristics based on tree size may yield different results.
//
if (TE.NeedToGather &&
std::any_of(std::next(VectorizableTree.begin(), I + 1),
@@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
- if (!E->ReorderIndices.empty())
+ if (E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
- else if (auto *I = dyn_cast<Instruction>(V))
- Builder.SetInsertPoint(I->getParent(),
- std::next(I->getIterator()));
- else
- Builder.SetInsertPoint(&F->getEntryBlock(),
- F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
@@ -3649,6 +3643,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
auto &Locs = ExternallyUsedValues[Scalar];
ExternallyUsedValues.insert({Ex, Locs});
ExternallyUsedValues.erase(Scalar);
+ // Required to update internally referenced instructions.
+ Scalar->replaceAllUsesWith(Ex);
continue;
}
@@ -3658,7 +3654,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
if (PHINode *PH = dyn_cast<PHINode>(User)) {
for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
if (PH->getIncomingValue(i) == Scalar) {
- TerminatorInst *IncomingTerminator =
+ Instruction *IncomingTerminator =
PH->getIncomingBlock(i)->getTerminator();
if (isa<CatchSwitchInst>(IncomingTerminator)) {
Builder.SetInsertPoint(VecI->getParent(),
@@ -3966,7 +3962,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
- assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
return true;
}
@@ -4002,7 +3998,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
- assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
+ assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I
<< "\n");
return true;
@@ -4273,7 +4269,7 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
Worklist.push_back(I);
// Traverse the expression tree in bottom-up order looking for loads. If we
- // encounter an instruciton we don't yet handle, we give up.
+ // encounter an instruction we don't yet handle, we give up.
auto MaxWidth = 0u;
auto FoundUnknownInst = false;
while (!Worklist.empty() && !FoundUnknownInst) {
@@ -4846,7 +4842,7 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
continue;
if (GEP->getType()->isVectorTy())
continue;
- GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP);
+ GEPs[GEP->getPointerOperand()].push_back(GEP);
}
}
}
@@ -5132,9 +5128,12 @@ class HorizontalReduction {
/// Checks if the reduction operation can be vectorized.
bool isVectorizable() const {
return LHS && RHS &&
- // We currently only support adds && min/max reductions.
+ // We currently only support add/mul/logical && min/max reductions.
((Kind == RK_Arithmetic &&
- (Opcode == Instruction::Add || Opcode == Instruction::FAdd)) ||
+ (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
+ Opcode == Instruction::And || Opcode == Instruction::Or ||
+ Opcode == Instruction::Xor)) ||
((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
(Kind == RK_Min || Kind == RK_Max)) ||
(Opcode == Instruction::ICmp &&
@@ -5456,7 +5455,7 @@ class HorizontalReduction {
}
};
- Instruction *ReductionRoot = nullptr;
+ WeakTrackingVH ReductionRoot;
/// The operation data of the reduction operation.
OperationData ReductionData;
@@ -5741,7 +5740,7 @@ public:
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
Value *VectorizedTree = nullptr;
- IRBuilder<> Builder(ReductionRoot);
+ IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
FastMathFlags Unsafe;
Unsafe.setFast();
Builder.setFastMathFlags(Unsafe);
@@ -5750,8 +5749,13 @@ public:
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
// The same extra argument may be used several time, so log each attempt
// to use it.
- for (auto &Pair : ExtraArgs)
+ for (auto &Pair : ExtraArgs) {
+ assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
+ }
+ // The reduction root is used as the insertion point for new instructions,
+ // so set it as externally used to prevent it from being deleted.
+ ExternallyUsedValues[ReductionRoot];
SmallVector<Value *, 16> IgnoreList;
for (auto &V : ReductionOps)
IgnoreList.append(V.begin(), V.end());
@@ -5803,6 +5807,7 @@ public:
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
// Emit a reduction.
+ Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (VectorizedTree) {
@@ -5829,8 +5834,6 @@ public:
VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
}
for (auto &Pair : ExternallyUsedValues) {
- assert(!Pair.second.empty() &&
- "At least one DebugLoc must be inserted");
// Add each externally used value to the final reduction.
for (auto *I : Pair.second) {
Builder.SetCurrentDebugLocation(I->getDebugLoc());