aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp198
1 files changed, 183 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 99c265fc5101..15b349f53fd9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -471,17 +471,36 @@ static bool isValidForAlternation(unsigned Opcode) {
return true;
}
+static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
+ unsigned BaseIndex = 0);
+
+/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
+/// compatible instructions or constants, or just some other regular values.
+static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
+ Value *Op1) {
+ return (isConstant(BaseOp0) && isConstant(Op0)) ||
+ (isConstant(BaseOp1) && isConstant(Op1)) ||
+ (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
+ !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
+ getSameOpcode({BaseOp0, Op0}).getOpcode() ||
+ getSameOpcode({BaseOp1, Op1}).getOpcode();
+}
+
/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
- unsigned BaseIndex = 0) {
+ unsigned BaseIndex) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
+ bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
+ CmpInst::Predicate BasePred =
+ IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
+ : CmpInst::BAD_ICMP_PREDICATE;
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
unsigned AltIndex = BaseIndex;
@@ -514,6 +533,57 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
continue;
}
}
+ } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
+ auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
+ auto *Inst = cast<Instruction>(VL[Cnt]);
+ Type *Ty0 = BaseInst->getOperand(0)->getType();
+ Type *Ty1 = Inst->getOperand(0)->getType();
+ if (Ty0 == Ty1) {
+ Value *BaseOp0 = BaseInst->getOperand(0);
+ Value *BaseOp1 = BaseInst->getOperand(1);
+ Value *Op0 = Inst->getOperand(0);
+ Value *Op1 = Inst->getOperand(1);
+ CmpInst::Predicate CurrentPred =
+ cast<CmpInst>(VL[Cnt])->getPredicate();
+ CmpInst::Predicate SwappedCurrentPred =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ // Check for compatible operands. If the corresponding operands are not
+ // compatible - need to perform alternate vectorization.
+ if (InstOpcode == Opcode) {
+ if (BasePred == CurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
+ continue;
+ if (BasePred == SwappedCurrentPred &&
+ areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
+ continue;
+ if (E == 2 &&
+ (BasePred == CurrentPred || BasePred == SwappedCurrentPred))
+ continue;
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ Value *AltOp0 = AltInst->getOperand(0);
+ Value *AltOp1 = AltInst->getOperand(1);
+ // Check if operands are compatible with alternate operands.
+ if (AltPred == CurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
+ continue;
+ if (AltPred == SwappedCurrentPred &&
+ areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
+ continue;
+ }
+ if (BaseIndex == AltIndex) {
+ assert(isValidForAlternation(Opcode) &&
+ isValidForAlternation(InstOpcode) &&
+ "Cast isn't safe for alternation, logic needs to be updated!");
+ AltIndex = Cnt;
+ continue;
+ }
+ auto *AltInst = cast<CmpInst>(VL[AltIndex]);
+ CmpInst::Predicate AltPred = AltInst->getPredicate();
+ if (BasePred == CurrentPred || BasePred == SwappedCurrentPred ||
+ AltPred == CurrentPred || AltPred == SwappedCurrentPred)
+ continue;
+ }
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
@@ -3307,9 +3377,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
+ // Do the analysis for each tree entry only once, otherwise the order of
+ // the same node my be considered several times, though might be not
+ // profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;
+ if (!VisitedOps.insert(OpTE).second)
+ continue;
if (!OpTE->ReuseShuffleIndices.empty() ||
(IgnoreReorder && OpTE == VectorizableTree.front().get()))
continue;
@@ -3333,9 +3408,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}
- if (VisitedOps.insert(OpTE).second)
- OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
- OpTE->UserTreeIndices.size();
+ OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+ OpTE->UserTreeIndices.size();
assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
--OrdersUses[{}];
}
@@ -4350,9 +4424,41 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
- if (isa<BinaryOperator>(VL0)) {
+ auto *CI = dyn_cast<CmpInst>(VL0);
+ if (isa<BinaryOperator>(VL0) || CI) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ if (!CI || all_of(VL, [](Value *V) {
+ return cast<CmpInst>(V)->isCommutative();
+ })) {
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+ } else {
+ CmpInst::Predicate P0 = CI->getPredicate();
+ CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
+ CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
+ Value *BaseOp0 = VL0->getOperand(0);
+ Value *BaseOp1 = VL0->getOperand(1);
+ // Collect operands - commute if it uses the swapped predicate or
+ // alternate operation.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ if ((P0 == CurrentPred &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
+ (P0 == CurrentPredSwapped &&
+ !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
+ std::swap(LHS, RHS);
+ } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
+ std::swap(LHS, RHS);
+ }
+ Left.push_back(LHS);
+ Right.push_back(RHS);
+ }
+ }
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5284,7 +5390,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
@@ -5332,6 +5439,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
+ Builder.getInt1Ty(),
+ CI0->getPredicate(), CostKind, VL0);
+ VecCost += TTI->getCmpSelInstrCost(
+ E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
+ E->getAltOp());
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
@@ -5348,6 +5463,29 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask);
@@ -6830,11 +6968,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
- Instruction::isCast(E->getAltOpcode()))) &&
+ Instruction::isCast(E->getAltOpcode())) ||
+ (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
"Invalid Shuffle Vector Operand");
Value *LHS = nullptr, *RHS = nullptr;
- if (Instruction::isBinaryOp(E->getOpcode())) {
+ if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
@@ -6854,6 +6993,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
+ } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
+ V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
+ auto *AltCI = cast<CmpInst>(E->getAltOp());
+ CmpInst::Predicate AltPred = AltCI->getPredicate();
+ unsigned AltIdx =
+ std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
+ if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
+ AltPred = CmpInst::getSwappedPredicate(AltPred);
+ V1 = Builder.CreateCmp(AltPred, LHS, RHS);
} else {
V0 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
@@ -6878,6 +7026,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
+ auto *AltCI0 = cast<CmpInst>(E->getAltOp());
+ auto *CI = cast<CmpInst>(I);
+ CmpInst::Predicate P0 = CI0->getPredicate();
+ CmpInst::Predicate AltP0 = AltCI0->getPredicate();
+ CmpInst::Predicate AltP0Swapped =
+ CmpInst::getSwappedPredicate(AltP0);
+ CmpInst::Predicate CurrentPred = CI->getPredicate();
+ CmpInst::Predicate CurrentPredSwapped =
+ CmpInst::getSwappedPredicate(CurrentPred);
+ if (P0 == AltP0 || P0 == AltP0Swapped) {
+ // Alternate cmps have same/swapped predicate as main cmps but
+ // different order of compatible operands.
+ return !(
+ (P0 == CurrentPred &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(0), I->getOperand(1))) ||
+ (P0 == CurrentPredSwapped &&
+ areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1),
+ I->getOperand(1), I->getOperand(0))));
+ }
+ return CurrentPred != P0 && CurrentPredSwapped != P0;
+ }
return I->getOpcode() == E->getAltOpcode();
},
Mask, &OpScalars, &AltScalars);
@@ -7676,11 +7847,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (ScheduleData *BundleMember = picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
Instruction *pickedInst = BundleMember->Inst;
- if (pickedInst->getNextNode() != LastScheduledInst) {
- BS->BB->getInstList().remove(pickedInst);
- BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
- pickedInst);
- }
+ if (pickedInst->getNextNode() != LastScheduledInst)
+ pickedInst->moveBefore(LastScheduledInst);
LastScheduledInst = pickedInst;
}
@@ -8444,7 +8612,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
R.reorderTopToBottom();
- R.reorderBottomToTop();
+ R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
R.buildExternalUses();
R.computeMinimumValueSizes();