diff options
Diffstat (limited to 'lib/Transforms/Vectorize')
-rw-r--r-- | lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 32 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 27 |
2 files changed, 43 insertions, 16 deletions
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4409d7a404f8..97dcb40a1d72 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" @@ -65,7 +66,9 @@ public: bool run(); private: - Value *getPointerOperand(Value *I); + Value *getPointerOperand(Value *I) const; + + GetElementPtrInst *getSourceGEP(Value *Src) const; unsigned getPointerAddressSpace(Value *I); @@ -215,7 +218,7 @@ bool Vectorizer::run() { return Changed; } -Value *Vectorizer::getPointerOperand(Value *I) { +Value *Vectorizer::getPointerOperand(Value *I) const { if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast<StoreInst>(I)) @@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) { return -1; } +GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const { + // First strip pointer bitcasts. Make sure pointee size is the same with + // and without casts. + // TODO: a stride set by the add instruction below can match the difference + // in pointee type size here. Currently it will not be vectorized. + Value *SrcPtr = getPointerOperand(Src); + Value *SrcBase = SrcPtr->stripPointerCasts(); + if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) == + DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType())) + SrcPtr = SrcBase; + return dyn_cast<GetElementPtrInst>(SrcPtr); +} + // FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { Value *PtrA = getPointerOperand(A); @@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // Look through GEPs after checking they're the same except for the last // index. - GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(getPointerOperand(A)); - GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(getPointerOperand(B)); + GetElementPtrInst *GEPA = getSourceGEP(A); + GetElementPtrInst *GEPB = getSourceGEP(B); if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) return false; unsigned FinalIndex = GEPA->getNumOperands() - 1; @@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // If any bits are known to be zero other than the sign bit in OpA, we can // add 1 to it while guaranteeing no overflow of any sort. if (!Safe) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT); - KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); - if (KnownZero != 0) + KnownBits Known(BitWidth); + computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); + if (Known.Zero.countTrailingZeros() < (BitWidth - 1)) Safe = true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 7eb8fabe0b2f..87ce0194dad6 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3586,8 +3586,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, IRBuilder<> B(MiddleBlock->getTerminator()); Value *CountMinusOne = B.CreateSub( CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); - Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(), - "cast.cmo"); + Value *CMO = + !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, CountMinusOne, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); + CMO->setName("cast.cmo"); Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); Escape->setName("ind.escape"); MissingVals[UI] = Escape; @@ -4516,14 +4520,15 @@ void InnerLoopVectorizer::predicateInstructions() { for (auto KV : PredicatedInstructions) { BasicBlock::iterator I(KV.first); BasicBlock *Head = I->getParent(); - auto *BB = SplitBlock(Head, &*std::next(I), DT, LI); auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DT, LI); I->moveBefore(T); sinkScalarOperands(&*I); - I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if"); - BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue"); + BasicBlock *PredicatedBlock = I->getParent(); + Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName(); + PredicatedBlock->setName(BBNamePrefix + ".if"); + PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue"); // If the instruction is non-void create a Phi node at reconvergence point. if (!I->getType()->isVoidTy()) { @@ -7324,8 +7329,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); - // TODO: IF-converted IFs become selects. - return 0; + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are + // converted into select instructions. We require N - 1 selects per phi + // node, where N is the number of incoming values. + if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) + return (Phi->getNumIncomingValues() - 1) * + TTI.getCmpSelInstrCost( + Instruction::Select, ToVectorTy(Phi->getType(), VF), + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + + return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: |