diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:17:04 +0000 |
commit | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch) | |
tree | 98b8f811c7aff2547cab8642daf372d6c59502fb /lib/Transforms/Vectorize/SLPVectorizer.cpp | |
parent | 6421cca32f69ac849537a3cff78c352195e99f1b (diff) |
Notes
Diffstat (limited to 'lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 716 |
1 files changed, 476 insertions, 240 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index fb94f7924ee0..bcaa8439cffa 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -115,22 +115,22 @@ static bool isValidElementType(Type *Ty) { !Ty->isPPC_FP128Ty(); } -/// \returns the parent basic block if all of the instructions in \p VL -/// are in the same block or null otherwise. -static BasicBlock *getSameBlock(ArrayRef<Value *> VL) { +/// \returns true if all of the instructions in \p VL are in the same block or +/// false otherwise. +static bool allSameBlock(ArrayRef<Value *> VL) { Instruction *I0 = dyn_cast<Instruction>(VL[0]); if (!I0) - return nullptr; + return false; BasicBlock *BB = I0->getParent(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast<Instruction>(VL[i]); if (!I) - return nullptr; + return false; if (BB != I->getParent()) - return nullptr; + return false; } - return BB; + return true; } /// \returns True if all of the values in \p VL are constants. @@ -211,12 +211,12 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) { /// of each scalar operation (VL) that will be converted into a vector (I). /// Flag set: NSW, NUW, exact, and all of fast-math. static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { - if (auto *VecOp = dyn_cast<BinaryOperator>(I)) { - if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) { + if (auto *VecOp = dyn_cast<Instruction>(I)) { + if (auto *Intersection = dyn_cast<Instruction>(VL[0])) { // Intersection is initialized to the 0th scalar, // so start counting from index '1'. for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i])) + if (auto *Scalar = dyn_cast<Instruction>(VL[i])) Intersection->andIRFlags(Scalar); } VecOp->copyIRFlags(Intersection); @@ -224,15 +224,15 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) { } } -/// \returns The type that all of the values in \p VL have or null if there -/// are different types. -static Type* getSameType(ArrayRef<Value *> VL) { +/// \returns true if all of the values in \p VL have the same type or false +/// otherwise. +static bool allSameType(ArrayRef<Value *> VL) { Type *Ty = VL[0]->getType(); for (int i = 1, e = VL.size(); i < e; i++) if (VL[i]->getType() != Ty) - return nullptr; + return false; - return Ty; + return true; } /// \returns True if Extract{Value,Element} instruction extracts element Idx. @@ -393,6 +393,10 @@ public: /// \returns number of elements in vector if isomorphism exists, 0 otherwise. unsigned canMapToVector(Type *T, const DataLayout &DL) const; + /// \returns True if the VectorizableTree is both tiny and not fully + /// vectorizable. We do not vectorize such trees. + bool isTreeTinyAndNotFullyVectorizable(); + private: struct TreeEntry; @@ -883,10 +887,10 @@ private: /// List of users to ignore during scheduling and that don't need extracting. ArrayRef<Value *> UserIgnoreList; - // Number of load-bundles, which contain consecutive loads. + // Number of load bundles that contain consecutive loads. int NumLoadsWantToKeepOrder; - // Number of load-bundles of size 2, which are consecutive loads if reversed. + // Number of load bundles that contain consecutive loads in reversed order. int NumLoadsWantToChangeOrder; // Analysis and block reference. @@ -906,8 +910,11 @@ private: IRBuilder<> Builder; /// A map of scalar integer values to the smallest bit width with which they - /// can legally be represented. - MapVector<Value *, uint64_t> MinBWs; + /// can legally be represented. The values map to (width, signed) pairs, + /// where "width" indicates the minimum bit width and "signed" is True if the + /// value must be signed-extended, rather than zero-extended, back to its + /// original width. + MapVector<Value *, std::pair<uint64_t, bool>> MinBWs; }; } // end namespace llvm @@ -917,7 +924,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ArrayRef<Value *> UserIgnoreLst) { deleteTree(); UserIgnoreList = UserIgnoreLst; - if (!getSameType(Roots)) + if (!allSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -958,8 +965,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, } // Ignore users in the user ignore list. - if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != - UserIgnoreList.end()) + if (is_contained(UserIgnoreList, UserInst)) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -972,9 +978,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { - bool SameTy = allConstant(VL) || getSameType(VL); (void)SameTy; bool isAltShuffle = false; - assert(SameTy && "Invalid types!"); + assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); if (Depth == RecursionMaxDepth) { DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); @@ -1007,7 +1012,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { + if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, false); return; @@ -1159,7 +1164,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } - // Check if the loads are consecutive or of we need to swizzle them. + + // Make sure all loads in the bundle are simple - we can't vectorize + // atomic or volatile loads. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { LoadInst *L = cast<LoadInst>(VL[i]); if (!L->isSimple()) { @@ -1168,20 +1175,47 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } + } + // Check if the loads are consecutive, reversed, or neither. + // TODO: What we really want is to sort the loads, but for now, check + // the two likely directions. + bool Consecutive = true; + bool ReverseConsecutive = true; + for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { - if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], *DL, *SE)) { - ++NumLoadsWantToChangeOrder; - } - BS.cancelScheduling(VL); - newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); - return; + Consecutive = false; + break; + } else { + ReverseConsecutive = false; } } - ++NumLoadsWantToKeepOrder; - newTreeEntry(VL, true); - DEBUG(dbgs() << "SLP: added a vector of loads.\n"); + + if (Consecutive) { + ++NumLoadsWantToKeepOrder; + newTreeEntry(VL, true); + DEBUG(dbgs() << "SLP: added a vector of loads.\n"); + return; + } + + // If none of the load pairs were consecutive when checked in order, + // check the reverse order. + if (ReverseConsecutive) + for (unsigned i = VL.size() - 1; i > 0; --i) + if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE)) { + ReverseConsecutive = false; + break; + } + + BS.cancelScheduling(VL); + newTreeEntry(VL, false); + + if (ReverseConsecutive) { + ++NumLoadsWantToChangeOrder; + DEBUG(dbgs() << "SLP: Gathering reversed loads.\n"); + } else { + DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); + } return; } case Instruction::ZExt: @@ -1541,8 +1575,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. if (MinBWs.count(VL[0])) - VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]), - VL.size()); + VecTy = VectorType::get( + IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size()); if (E->NeedToGather) { if (allConstant(VL)) @@ -1553,7 +1587,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return getGatherCost(E->Scalars); } unsigned Opcode = getSameOpcode(VL); - assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); + assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast<Instruction>(VL[0]); switch (Opcode) { case Instruction::PHI: { @@ -1762,7 +1796,10 @@ bool BoUpSLP::isFullyVectorizableTinyTree() { DEBUG(dbgs() << "SLP: Check whether the tree with height " << VectorizableTree.size() << " is fully vectorizable .\n"); - // We only handle trees of height 2. + // We only handle trees of heights 1 and 2. + if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather) + return true; + if (VectorizableTree.size() != 2) return false; @@ -1779,6 +1816,27 @@ bool BoUpSLP::isFullyVectorizableTinyTree() { return true; } +bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() { + + // We can vectorize the tree if its size is greater than or equal to the + // minimum size specified by the MinTreeSize command line option. + if (VectorizableTree.size() >= MinTreeSize) + return false; + + // If we have a tiny tree (a tree whose size is less than MinTreeSize), we + // can vectorize it if we can prove it fully vectorizable. + if (isFullyVectorizableTinyTree()) + return false; + + assert(VectorizableTree.empty() + ? ExternalUses.empty() + : true && "We shouldn't have any external users"); + + // Otherwise, we can't vectorize the tree. It is both tiny and not fully + // vectorizable. + return true; +} + int BoUpSLP::getSpillCost() { // Walk from the bottom of the tree to the top, tracking which values are // live. When we see a call instruction that is not part of our tree, @@ -1816,9 +1874,9 @@ int BoUpSLP::getSpillCost() { ); // Now find the sequence of instructions between PrevInst and Inst. - BasicBlock::reverse_iterator InstIt(Inst->getIterator()), - PrevInstIt(PrevInst->getIterator()); - --PrevInstIt; + BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(), + PrevInstIt = + PrevInst->getIterator().getReverse(); while (InstIt != PrevInstIt) { if (PrevInstIt == PrevInst->getParent()->rend()) { PrevInstIt = Inst->getParent()->rbegin(); @@ -1846,14 +1904,6 @@ int BoUpSLP::getTreeCost() { DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); - // We only vectorize tiny trees if it is fully vectorizable. - if (VectorizableTree.size() < MinTreeSize && !isFullyVectorizableTinyTree()) { - if (VectorizableTree.empty()) { - assert(!ExternalUses.size() && "We should not have any external users"); - } - return INT_MAX; - } - unsigned BundleWidth = VectorizableTree[0].Scalars.size(); for (TreeEntry &TE : VectorizableTree) { @@ -1882,10 +1932,12 @@ int BoUpSLP::getTreeCost() { auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); auto *ScalarRoot = VectorizableTree[0].Scalars[0]; if (MinBWs.count(ScalarRoot)) { - auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]); + auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); + auto Extend = + MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt; VecTy = VectorType::get(MinTy, BundleWidth); - ExtractCost += TTI->getExtractWithExtendCost( - Instruction::SExt, EU.Scalar->getType(), VecTy, EU.Lane); + ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(), + VecTy, EU.Lane); } else { ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane); @@ -2182,7 +2234,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) { // Set the insertion point after the last instruction in the bundle. Set the // debug location to Front. - Builder.SetInsertPoint(BB, next(BasicBlock::iterator(LastInst))); + Builder.SetInsertPoint(BB, ++LastInst->getIterator()); Builder.SetCurrentDebugLocation(Front->getDebugLoc()); } @@ -2383,6 +2435,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { V = Builder.CreateICmp(P0, L, R); E->VectorizedValue = V; + propagateIRFlags(E->VectorizedValue, E->Scalars); ++NumVectorInstructions; return V; } @@ -2593,6 +2646,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); E->VectorizedValue = V; + propagateIRFlags(E->VectorizedValue, E->Scalars); ++NumVectorInstructions; return V; } @@ -2669,7 +2723,7 @@ Value *BoUpSLP::vectorizeTree() { if (auto *I = dyn_cast<Instruction>(VectorRoot)) Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); auto BundleWidth = VectorizableTree[0].Scalars.size(); - auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]); + auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto *VecTy = VectorType::get(MinTy, BundleWidth); auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy); VectorizableTree[0].VectorizedValue = Trunc; @@ -2677,6 +2731,16 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + // If necessary, sign-extend or zero-extend ScalarRoot to the larger type + // specified by ScalarType. + auto extend = [&](Value *ScalarRoot, Value *Ex, Type *ScalarType) { + if (!MinBWs.count(ScalarRoot)) + return Ex; + if (MinBWs[ScalarRoot].second) + return Builder.CreateSExt(Ex, ScalarType); + return Builder.CreateZExt(Ex, ScalarType); + }; + // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -2684,8 +2748,7 @@ Value *BoUpSLP::vectorizeTree() { // Skip users that we already RAUW. This happens when one instruction // has multiple uses of the same value. - if (std::find(Scalar->user_begin(), Scalar->user_end(), User) == - Scalar->user_end()) + if (!is_contained(Scalar->users(), User)) continue; assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar"); @@ -2712,8 +2775,7 @@ Value *BoUpSLP::vectorizeTree() { Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); } Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (MinBWs.count(ScalarRoot)) - Ex = Builder.CreateSExt(Ex, Scalar->getType()); + Ex = extend(ScalarRoot, Ex, Scalar->getType()); CSEBlocks.insert(PH->getIncomingBlock(i)); PH->setOperand(i, Ex); } @@ -2721,16 +2783,14 @@ Value *BoUpSLP::vectorizeTree() { } else { Builder.SetInsertPoint(cast<Instruction>(User)); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (MinBWs.count(ScalarRoot)) - Ex = Builder.CreateSExt(Ex, Scalar->getType()); + Ex = extend(ScalarRoot, Ex, Scalar->getType()); CSEBlocks.insert(cast<Instruction>(User)->getParent()); User->replaceUsesOfWith(Scalar, Ex); } } else { Builder.SetInsertPoint(&F->getEntryBlock().front()); Value *Ex = Builder.CreateExtractElement(Vec, Lane); - if (MinBWs.count(ScalarRoot)) - Ex = Builder.CreateSExt(Ex, Scalar->getType()); + Ex = extend(ScalarRoot, Ex, Scalar->getType()); CSEBlocks.insert(&F->getEntryBlock()); User->replaceUsesOfWith(Scalar, Ex); } @@ -2759,8 +2819,7 @@ Value *BoUpSLP::vectorizeTree() { assert((ScalarToTreeEntry.count(U) || // It is legal to replace users in the ignorelist by undef. - (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != - UserIgnoreList.end())) && + is_contained(UserIgnoreList, U)) && "Replacing out-of-tree value with undef"); } #endif @@ -2853,7 +2912,7 @@ void BoUpSLP::optimizeGatherSequence() { } } if (In) { - assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end()); + assert(!is_contained(Visited, In)); Visited.push_back(In); } } @@ -2994,9 +3053,10 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { } // Search up and down at the same time, because we don't know if the new // instruction is above or below the existing scheduling region. - BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator()); + BasicBlock::reverse_iterator UpIter = + ++ScheduleStart->getIterator().getReverse(); BasicBlock::reverse_iterator UpperEnd = BB->rend(); - BasicBlock::iterator DownIter(ScheduleEnd); + BasicBlock::iterator DownIter = ScheduleEnd->getIterator(); BasicBlock::iterator LowerEnd = BB->end(); for (;;) { if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { @@ -3451,6 +3511,11 @@ void BoUpSLP::computeMinimumValueSizes() { Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth); } + // True if the roots can be zero-extended back to their original type, rather + // than sign-extended. We know that if the leading bits are not demanded, we + // can safely zero-extend. So we initialize IsKnownPositive to True. + bool IsKnownPositive = true; + // If all the bits of the roots are demanded, we can try a little harder to // compute a narrower type. This can happen, for example, if the roots are // getelementptr indices. InstCombine promotes these indices to the pointer @@ -3462,11 +3527,41 @@ void BoUpSLP::computeMinimumValueSizes() { // compute the number of high-order bits we can truncate. if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) { MaxBitWidth = 8u; + + // Determine if the sign bit of all the roots is known to be zero. If not, + // IsKnownPositive is set to False. + IsKnownPositive = all_of(TreeRoot, [&](Value *R) { + bool KnownZero = false; + bool KnownOne = false; + ComputeSignBit(R, KnownZero, KnownOne, *DL); + return KnownZero; + }); + + // Determine the maximum number of bits required to store the scalar + // values. for (auto *Scalar : ToDemote) { auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth); } + + // If we can't prove that the sign bit is zero, we must add one to the + // maximum bit width to account for the unknown sign bit. This preserves + // the existing sign bit so we can safely sign-extend the root back to the + // original type. Otherwise, if we know the sign bit is zero, we will + // zero-extend the root instead. + // + // FIXME: This is somewhat suboptimal, as there will be cases where adding + // one to the maximum bit width will yield a larger-than-necessary + // type. In general, we need to add an extra bit only if we can't + // prove that the upper bit of the original type is equal to the + // upper bit of the proposed smaller type. If these two bits are the + // same (either zero or one) we know that sign-extending from the + // smaller type will result in the same value. Here, since we can't + // yet prove this, we are just making the proposed smaller type + // larger to ensure correctness. + if (!IsKnownPositive) + ++MaxBitWidth; } // Round MaxBitWidth up to the next power-of-two. @@ -3486,7 +3581,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Finally, map the values we can demote to the maximum bit with we computed. for (auto *Scalar : ToDemote) - MinBWs[Scalar] = MaxBitWidth; + MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive); } namespace { @@ -3642,8 +3737,7 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, return !std::equal(VL.begin(), VL.end(), VH.begin()); } -bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, - int CostThreshold, BoUpSLP &R, +bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, unsigned VecRegSize) { unsigned ChainLen = Chain.size(); DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen @@ -3672,12 +3766,15 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, ArrayRef<Value *> Operands = Chain.slice(i, VF); R.buildTree(Operands); + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; + R.computeMinimumValueSizes(); int Cost = R.getTreeCost(); DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); - if (Cost < CostThreshold) { + if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); R.vectorizeTree(); @@ -3691,7 +3788,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, } bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, - int costThreshold, BoUpSLP &R) { + BoUpSLP &R) { SetVector<StoreInst *> Heads, Tails; SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; @@ -3746,8 +3843,9 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, // FIXME: Is division-by-2 the correct step? Should we assert that the // register size is a power-of-2? - for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); Size /= 2) { - if (vectorizeStoreChain(Operands, costThreshold, R, Size)) { + for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); + Size /= 2) { + if (vectorizeStoreChain(Operands, R, Size)) { // Mark the vectorized stores so that we don't vectorize them again. VectorizedStores.insert(Operands.begin(), Operands.end()); Changed = true; @@ -3805,11 +3903,12 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, ArrayRef<Value *> BuildVector, - bool allowReorder) { + bool AllowReorder) { if (VL.size() < 2) return false; - DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n"); + DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = " << VL.size() + << ".\n"); // Check that all of the parts are scalar instructions of the same type. Instruction *I0 = dyn_cast<Instruction>(VL[0]); @@ -3818,10 +3917,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, unsigned Opcode0 = I0->getOpcode(); - // FIXME: Register size should be a parameter to this function, so we can - // try different vectorization factors. unsigned Sz = R.getVectorElementSize(I0); - unsigned VF = R.getMinVecRegSize() / Sz; + unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); + unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF); + if (MaxVF < 2) + return false; for (Value *V : VL) { Type *Ty = V->getType(); @@ -3837,70 +3937,89 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, // Keep track of values that were deleted by vectorizing in the loop below. SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - unsigned OpsWidth = 0; + unsigned NextInst = 0, MaxInst = VL.size(); + for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; + VF /= 2) { + // No actual vectorization should happen, if number of parts is the same as + // provided vectorization factor (i.e. the scalar type is used for vector + // code during codegen). + auto *VecTy = VectorType::get(VL[0]->getType(), VF); + if (TTI->getNumberOfParts(VecTy) == VF) + continue; + for (unsigned I = NextInst; I < MaxInst; ++I) { + unsigned OpsWidth = 0; - if (i + VF > e) - OpsWidth = e - i; - else - OpsWidth = VF; + if (I + VF > MaxInst) + OpsWidth = MaxInst - I; + else + OpsWidth = VF; - if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) - break; + if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) + break; - // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth)) - continue; + // Check that a previous iteration of this loop did not delete the Value. + if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + continue; - DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " - << "\n"); - ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); - - ArrayRef<Value *> BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(i, OpsWidth); - - R.buildTree(Ops, BuildVectorSlice); - // TODO: check if we can allow reordering also for other cases than - // tryToVectorizePair() - if (allowReorder && R.shouldReorder()) { - assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); - Value *ReorderedOps[] = { Ops[1], Ops[0] }; - R.buildTree(ReorderedOps, None); - } - R.computeMinimumValueSizes(); - int Cost = R.getTreeCost(); + DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " + << "\n"); + ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); + + ArrayRef<Value *> BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(I, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); + // TODO: check if we can allow reordering for more cases. + if (AllowReorder && R.shouldReorder()) { + // Conceptually, there is nothing actually preventing us from trying to + // reorder a larger list. In fact, we do exactly this when vectorizing + // reductions. However, at this point, we only expect to get here from + // tryToVectorizePair(). + assert(Ops.size() == 2); + assert(BuildVectorSlice.empty()); + Value *ReorderedOps[] = {Ops[1], Ops[0]}; + R.buildTree(ReorderedOps, None); + } + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; - if (Cost < -SLPCostThreshold) { - DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The vectorized - // root will precede it. This guarantees that we get an instruction. The - // vectorized tree could have been constant folded. - Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder<NoFolder> Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast<Instruction>(V); - assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I)); - Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->removeFromParent(); - I->insertAfter(Extract); - InsertAfter = I; + R.computeMinimumValueSizes(); + int Cost = R.getTreeCost(); + + if (Cost < -SLPCostThreshold) { + DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are + // undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The + // vectorized root will precede it. This guarantees that we get an + // instruction. The vectorized tree could have been constant folded. + Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder<NoFolder> Builder(InsertAfter->getParent(), + ++BasicBlock::iterator(InsertAfter)); + Instruction *I = cast<Instruction>(V); + assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I)); + Instruction *Extract = + cast<Instruction>(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + I->setOperand(1, Extract); + I->removeFromParent(); + I->insertAfter(Extract); + InsertAfter = I; + } } + // Move to the next bundle. + I += VF - 1; + NextInst = I + 1; + Changed = true; } - // Move to the next bundle. - i += VF - 1; - Changed = true; } } @@ -3911,36 +4030,40 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { if (!V) return false; + Value *P = V->getParent(); + + // Vectorize in current basic block only. + auto *Op0 = dyn_cast<Instruction>(V->getOperand(0)); + auto *Op1 = dyn_cast<Instruction>(V->getOperand(1)); + if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P) + return false; + // Try to vectorize V. - if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) + if (tryToVectorizePair(Op0, Op1, R)) return true; - BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); - BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); + auto *A = dyn_cast<BinaryOperator>(Op0); + auto *B = dyn_cast<BinaryOperator>(Op1); // Try to skip B. if (B && B->hasOneUse()) { - BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); - BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); - if (tryToVectorizePair(A, B0, R)) { + auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); + auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); + if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R)) return true; - } - if (tryToVectorizePair(A, B1, R)) { + if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R)) return true; - } } // Try to skip A. if (A && A->hasOneUse()) { - BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); - BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); - if (tryToVectorizePair(A0, B, R)) { + auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); + auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); + if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R)) return true; - } - if (tryToVectorizePair(A1, B, R)) { + if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R)) return true; - } } - return 0; + return false; } /// \brief Generate a shuffle mask to be used in a reduction tree. @@ -3973,7 +4096,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, return ConstantVector::get(ShuffleMask); } - +namespace { /// Model horizontal reductions. /// /// A horizontal reduction is a tree of reduction operations (currently add and @@ -4006,7 +4129,14 @@ class HorizontalReduction { SmallVector<Value *, 32> ReducedVals; BinaryOperator *ReductionRoot; - PHINode *ReductionPHI; + // After successfull horizontal reduction vectorization attempt for PHI node + // vectorizer tries to update root binary op by combining vectorized tree and + // the ReductionPHI node. But during vectorization this ReductionPHI can be + // vectorized itself and replaced by the undef value, while the instruction + // itself is marked for deletion. This 'marked for deletion' PHI node then can + // be used in new binary operation, causing "Use still stuck around after Def + // is destroyed" crash upon PHI node deletion. + WeakVH ReductionPHI; /// The opcode of the reduction. unsigned ReductionOpcode; @@ -4025,14 +4155,13 @@ public: unsigned MinVecRegSize; HorizontalReduction(unsigned MinVecRegSize) - : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), - ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0), + : ReductionRoot(nullptr), ReductionOpcode(0), ReducedValueOpcode(0), + IsPairwiseReduction(false), ReduxWidth(0), MinVecRegSize(MinVecRegSize) {} /// \brief Try to find a reduction tree. bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { - assert((!Phi || - std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) && + assert((!Phi || is_contained(Phi->operands(), B)) && "Thi phi needs to use the binary operator"); // We could have a initial reductions that is not an add. @@ -4113,12 +4242,21 @@ public: // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVist); - // We currently only allow BinaryOperator's and SelectInst's as reduction - // values in our tree. - if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV)) - Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0)); - else if (NextV != Phi) + if (NextV != Phi) { + auto *I = dyn_cast<Instruction>(NextV); + // Continue analysis if the next operand is a reduction operation or + // (possibly) a reduced value. If the reduced value opcode is not set, + // the first met operation != reduction operation is considered as the + // reduced value class. + if (I && (!ReducedValueOpcode || I->getOpcode() == ReducedValueOpcode || + I->getOpcode() == ReductionOpcode)) { + if (!ReducedValueOpcode && I->getOpcode() != ReductionOpcode) + ReducedValueOpcode = I->getOpcode(); + Stack.push_back(std::make_pair(I, 0)); + continue; + } return false; + } } return true; } @@ -4141,7 +4279,15 @@ public: unsigned i = 0; for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { - V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps); + auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); + V.buildTree(VL, ReductionOps); + if (V.shouldReorder()) { + SmallVector<Value *, 8> Reversed(VL.rbegin(), VL.rend()); + V.buildTree(Reversed, ReductionOps); + } + if (V.isTreeTinyAndNotFullyVectorizable()) + continue; + V.computeMinimumValueSizes(); // Estimate cost. @@ -4175,7 +4321,7 @@ public: ReducedVals[i]); } // Update users. - if (ReductionPHI) { + if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) { assert(ReductionRoot && "Need a reduction operation"); ReductionRoot->setOperand(0, VectorizedTree); ReductionRoot->setOperand(1, ReductionPHI); @@ -4202,7 +4348,8 @@ private: int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost; int ScalarReduxCost = - ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy); + (ReduxWidth - 1) * + TTI->getArithmeticInstrCost(ReductionOpcode, ScalarTy); DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost << " for reduction that starts with " << *FirstReducedVal @@ -4254,6 +4401,7 @@ private: return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } }; +} // end anonymous namespace /// \brief Recognize construction of vectors like /// %ra = insertelement <4 x float> undef, float %s0, i32 0 @@ -4354,7 +4502,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; // There is a loop latch, return the incoming value if it comes from - // that. This reduction pattern occassionaly turns up. + // that. This reduction pattern occasionally turns up. if (P->getIncomingBlock(0) == BBLatch) { Rdx = P->getIncomingValue(0); } else if (P->getIncomingBlock(1) == BBLatch) { @@ -4367,29 +4515,143 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } +namespace { +/// Tracks instructons and its children. +class WeakVHWithLevel final : public CallbackVH { + /// Operand index of the instruction currently beeing analized. + unsigned Level = 0; + /// Is this the instruction that should be vectorized, or are we now + /// processing children (i.e. operands of this instruction) for potential + /// vectorization? + bool IsInitial = true; + +public: + explicit WeakVHWithLevel() = default; + WeakVHWithLevel(Value *V) : CallbackVH(V){}; + /// Restart children analysis each time it is repaced by the new instruction. + void allUsesReplacedWith(Value *New) override { + setValPtr(New); + Level = 0; + IsInitial = true; + } + /// Check if the instruction was not deleted during vectorization. + bool isValid() const { return !getValPtr(); } + /// Is the istruction itself must be vectorized? + bool isInitial() const { return IsInitial; } + /// Try to vectorize children. + void clearInitial() { IsInitial = false; } + /// Are all children processed already? + bool isFinal() const { + assert(getValPtr() && + (isa<Instruction>(getValPtr()) && + cast<Instruction>(getValPtr())->getNumOperands() >= Level)); + return getValPtr() && + cast<Instruction>(getValPtr())->getNumOperands() == Level; + } + /// Get next child operation. + Value *nextOperand() { + assert(getValPtr() && isa<Instruction>(getValPtr()) && + cast<Instruction>(getValPtr())->getNumOperands() > Level); + return cast<Instruction>(getValPtr())->getOperand(Level++); + } + virtual ~WeakVHWithLevel() = default; +}; +} // namespace + /// \brief Attempt to reduce a horizontal reduction. /// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators BI, then check if it -/// can be done. +/// the phi node P with reduction operators Root in a basic block BB, then check +/// if it can be done. /// \returns true if a horizontal reduction was matched and reduced. /// \returns false if a horizontal reduction was not matched. -static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, - BoUpSLP &R, TargetTransformInfo *TTI, - unsigned MinRegSize) { +static bool canBeVectorized( + PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, + TargetTransformInfo *TTI, + const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) { if (!ShouldVectorizeHor) return false; - HorizontalReduction HorRdx(MinRegSize); - if (!HorRdx.matchAssociativeReduction(P, BI)) + if (!Root) return false; - // If there is a sufficient number of reduction values, reduce - // to a nearby power-of-2. Can safely generate oversized - // vectors and rely on the backend to split them to legal sizes. - HorRdx.ReduxWidth = - std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); + if (Root->getParent() != BB) + return false; + SmallVector<WeakVHWithLevel, 8> Stack(1, Root); + SmallSet<Value *, 8> VisitedInstrs; + bool Res = false; + while (!Stack.empty()) { + Value *V = Stack.back(); + if (!V) { + Stack.pop_back(); + continue; + } + auto *Inst = dyn_cast<Instruction>(V); + if (!Inst || isa<PHINode>(Inst)) { + Stack.pop_back(); + continue; + } + if (Stack.back().isInitial()) { + Stack.back().clearInitial(); + if (auto *BI = dyn_cast<BinaryOperator>(Inst)) { + HorizontalReduction HorRdx(R.getMinVecRegSize()); + if (HorRdx.matchAssociativeReduction(P, BI)) { + // If there is a sufficient number of reduction values, reduce + // to a nearby power-of-2. Can safely generate oversized + // vectors and rely on the backend to split them to legal sizes. + HorRdx.ReduxWidth = + std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); + + if (HorRdx.tryToReduce(R, TTI)) { + Res = true; + P = nullptr; + continue; + } + } + if (P) { + Inst = dyn_cast<Instruction>(BI->getOperand(0)); + if (Inst == P) + Inst = dyn_cast<Instruction>(BI->getOperand(1)); + if (!Inst) { + P = nullptr; + continue; + } + } + } + P = nullptr; + if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) { + Res = true; + continue; + } + } + if (Stack.back().isFinal()) { + Stack.pop_back(); + continue; + } - return HorRdx.tryToReduce(R, TTI); + if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand())) + if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && + Stack.size() < RecursionMaxDepth) + Stack.push_back(NextV); + } + return Res; +} + +bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, + BasicBlock *BB, BoUpSLP &R, + TargetTransformInfo *TTI) { + if (!V) + return false; + auto *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + if (!isa<BinaryOperator>(I)) + P = nullptr; + // Try to match and vectorize a horizontal reduction. + return canBeVectorized(P, I, BB, R, TTI, + [this](BinaryOperator *BI, BoUpSLP &R) -> bool { + return tryToVectorize(BI, R); + }); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { @@ -4459,65 +4721,42 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (P->getNumIncomingValues() != 2) return Changed; - Value *Rdx = getReductionValue(DT, P, BB, LI); - - // Check if this is a Binary Operator. - BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); - if (!BI) - continue; - // Try to match and vectorize a horizontal reduction. - if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; - } - - Value *Inst = BI->getOperand(0); - if (Inst == P) - Inst = BI->getOperand(1); - - if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) { - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. + if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R, + TTI)) { Changed = true; it = BB->begin(); e = BB->end(); continue; } - continue; } - if (ShouldStartVectorizeHorAtStore) - if (StoreInst *SI = dyn_cast<StoreInst>(it)) - if (BinaryOperator *BinOp = - dyn_cast<BinaryOperator>(SI->getValueOperand())) { - if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI, - R.getMinVecRegSize()) || - tryToVectorize(BinOp, R)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; - } + if (ShouldStartVectorizeHorAtStore) { + if (StoreInst *SI = dyn_cast<StoreInst>(it)) { + // Try to match and vectorize a horizontal reduction. + if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R, + TTI)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; } + } + } // Try to vectorize horizontal reductions feeding into a return. - if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) - if (RI->getNumOperands() != 0) - if (BinaryOperator *BinOp = - dyn_cast<BinaryOperator>(RI->getOperand(0))) { - DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); - if (tryToVectorizePair(BinOp->getOperand(0), - BinOp->getOperand(1), R)) { - Changed = true; - it = BB->begin(); - e = BB->end(); - continue; - } + if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) { + if (RI->getNumOperands() != 0) { + // Try to match and vectorize a horizontal reduction. + if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; } + } + } // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast<CmpInst>(it)) { @@ -4530,16 +4769,14 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; } - for (int i = 0; i < 2; ++i) { - if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) { - if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) { - Changed = true; - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - it = BB->begin(); - e = BB->end(); - break; - } + for (int I = 0; I < 2; ++I) { + if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) { + Changed = true; + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + it = BB->begin(); + e = BB->end(); + break; } } continue; @@ -4690,8 +4927,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { // may cause a significant compile-time increase. for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) { unsigned Len = std::min<unsigned>(CE - CI, 16); - Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), - -SLPCostThreshold, R); + Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), R); } } return Changed; |