diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 249 |
1 files changed, 173 insertions, 76 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 65aaef28d87a..71b7f279e5fa 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" +#include <numeric> using namespace llvm; using namespace PatternMatch; @@ -843,33 +844,33 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return nullptr; } -Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, +Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext, bool DoTransform) { // If we are just checking for a icmp eq of a single bit and zext'ing it // to an integer, then shift the bit to the appropriate place and then // cast to integer to avoid the comparison. const APInt *Op1CV; - if (match(ICI->getOperand(1), m_APInt(Op1CV))) { + if (match(Cmp->getOperand(1), m_APInt(Op1CV))) { // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) || - (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) { - if (!DoTransform) return ICI; + if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) || + (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) { + if (!DoTransform) return Cmp; - Value *In = ICI->getOperand(0); + Value *In = Cmp->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits() - 1); In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit"); - if (In->getType() != CI.getType()) - In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/); + if (In->getType() != Zext.getType()) + In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/); - if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { + if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); In = Builder.CreateXor(In, One, In->getName() + ".not"); } - return replaceInstUsesWith(CI, In); + return replaceInstUsesWith(Zext, In); } // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. @@ -882,24 +883,24 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) && // This only works for EQ and NE - ICI->isEquality()) { + Cmp->isEquality()) { // If Op1C some other power of two, convert: - KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI); + KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext); APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? - if (!DoTransform) return ICI; + if (!DoTransform) return Cmp; - bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; + bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE; if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(CI.getType(), isNE); - return replaceInstUsesWith(CI, Res); + Constant *Res = ConstantInt::get(Zext.getType(), isNE); + return replaceInstUsesWith(Zext, Res); } uint32_t ShAmt = KnownZeroMask.logBase2(); - Value *In = ICI->getOperand(0); + Value *In = Cmp->getOperand(0); if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. @@ -912,11 +913,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, In = Builder.CreateXor(In, One); } - if (CI.getType() == In->getType()) - return replaceInstUsesWith(CI, In); + if (Zext.getType() == In->getType()) + return replaceInstUsesWith(Zext, In); - Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false); - return replaceInstUsesWith(CI, IntCast); + Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false); + return replaceInstUsesWith(Zext, IntCast); } } } @@ -924,19 +925,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // icmp ne A, B is equal to xor A, B when A and B only really have one bit. // It is also profitable to transform icmp eq into not(xor(A, B)) because that // may lead to additional simplifications. - if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { - if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); + if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) { + if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) { + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); - KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI); - KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI); + KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext); + KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext); if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { APInt KnownBits = KnownLHS.Zero | KnownLHS.One; APInt UnknownBit = ~KnownBits; if (UnknownBit.countPopulation() == 1) { - if (!DoTransform) return ICI; + if (!DoTransform) return Cmp; Value *Result = Builder.CreateXor(LHS, RHS); @@ -949,10 +950,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Result = Builder.CreateLShr( Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + if (Cmp->getPredicate() == ICmpInst::ICMP_EQ) Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1)); - Result->takeName(ICI); - return replaceInstUsesWith(CI, Result); + Result->takeName(Cmp); + return replaceInstUsesWith(Zext, Result); } } } @@ -1172,8 +1173,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { } } - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) - return transformZExtICmp(ICI, CI); + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src)) + return transformZExtICmp(Cmp, CI); BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src); if (SrcI && SrcI->getOpcode() == Instruction::Or) { @@ -1188,7 +1189,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName()); Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName()); - BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast); + Value *Or = Builder.CreateOr(LCast, RCast, CI.getName()); + if (auto *OrInst = dyn_cast<Instruction>(Or)) + Builder.SetInsertPoint(OrInst); // Perform the elimination. if (auto *LZExt = dyn_cast<ZExtInst>(LCast)) @@ -1196,7 +1199,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (auto *RZExt = dyn_cast<ZExtInst>(RCast)) transformZExtICmp(RHS, *RZExt); - return Or; + return replaceInstUsesWith(CI, Or); } } @@ -1621,6 +1624,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { Value *X; Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0)); if (Op && Op->hasOneUse()) { + // FIXME: The FMF should propagate from the fptrunc, not the source op. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + if (isa<FPMathOperator>(Op)) + Builder.setFastMathFlags(Op->getFastMathFlags()); + if (match(Op, m_FNeg(m_Value(X)))) { Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty); @@ -1630,6 +1638,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) { return BinaryOperator::CreateFNegFMF(InnerTrunc, Op); return UnaryOperator::CreateFNegFMF(InnerTrunc, Op); } + + // If we are truncating a select that has an extended operand, we can + // narrow the other operand and do the select as a narrow op. + Value *Cond, *X, *Y; + if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) && + X->getType() == Ty) { + // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y) + Value *NarrowY = Builder.CreateFPTrunc(Y, Ty); + Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op); + return replaceInstUsesWith(FPT, Sel); + } + if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) && + X->getType() == Ty) { + // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X + Value *NarrowY = Builder.CreateFPTrunc(Y, Ty); + Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op); + return replaceInstUsesWith(FPT, Sel); + } } if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) { @@ -1808,7 +1834,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - if (Ty->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) + if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) return commonPointerCastTransforms(CI); Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); @@ -1820,12 +1846,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// +/// A vector is always stored with index 0 at the lowest address, which +/// corresponds to the most significant bits for a big endian stored integer and +/// the least significant bits for little endian. A trunc/zext of an integer +/// impacts the big end of the integer. Thus, we need to add/remove elements at +/// the front of the vector for big endian targets, and the back of the vector +/// for little endian targets. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, - InstCombiner &IC) { +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal, + VectorType *DestTy, + InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. @@ -1844,31 +1882,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + assert(SrcElts != DestElts && "Element counts should be different."); + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector<uint32_t, 16> ShuffleMask; + SmallVector<uint32_t, 16> ShuffleMaskStorage; + ArrayRef<uint32_t> ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low - // elements from the input and use undef as the second shuffle input. - V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + // Produce an identify shuffle mask for the src vector. + ShuffleMaskStorage.resize(SrcElts); + std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); + if (SrcElts > DestElts) { + // If we're shrinking the number of elements (rewriting an integer + // truncate), just shuffle in the elements corresponding to the least + // significant bits from the input and use undef as the second shuffle + // input. + V2 = UndefValue::get(SrcTy); + // Make sure the shuffle mask selects the "least significant bits" by + // keeping elements from back of the src vector for big endian, and from the + // front for little endian. + ShuffleMask = ShuffleMaskStorage; + if (IsBigEndian) + ShuffleMask = ShuffleMask.take_back(DestElts); + else + ShuffleMask = ShuffleMask.take_front(DestElts); } else { - // If we're increasing the number of elements, shuffle in all of the - // elements from InVal and fill the rest of the result elements with zeros - // from a constant zero. + // If we're increasing the number of elements (rewriting an integer zext), + // shuffle in all of the elements from InVal. Fill the rest of the result + // elements with zeros from a constant zero. V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); - for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(i); - - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + // Use first elt from V2 when indicating zero in the shuffle mask. + uint32_t NullElt = SrcElts; + // Extend with null values in the "most significant bits" by adding elements + // in front of the src vector for big endian, and at the back for little + // endian. + unsigned DeltaElts = DestElts - SrcElts; + if (IsBigEndian) + ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt); + else + ShuffleMaskStorage.append(DeltaElts, NullElt); + ShuffleMask = ShuffleMaskStorage; } return new ShuffleVectorInst(InVal, V2, @@ -2217,6 +2277,31 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { } } + // Check that each user of each old PHI node is something that we can + // rewrite, so that all of the old PHI nodes can be cleaned up afterwards. + for (auto *OldPN : OldPhiNodes) { + for (User *V : OldPN->users()) { + if (auto *SI = dyn_cast<StoreInst>(V)) { + if (!SI->isSimple() || SI->getOperand(0) != OldPN) + return nullptr; + } else if (auto *BCI = dyn_cast<BitCastInst>(V)) { + // Verify it's a B->A cast. + Type *TyB = BCI->getOperand(0)->getType(); + Type *TyA = BCI->getType(); + if (TyA != DestTy || TyB != SrcTy) + return nullptr; + } else if (auto *PHI = dyn_cast<PHINode>(V)) { + // As long as the user is another old PHI node, then even if we don't + // rewrite it, the PHI web we're considering won't have any users + // outside itself, so it'll be dead. + if (OldPhiNodes.count(PHI) == 0) + return nullptr; + } else { + return nullptr; + } + } + } + // For each old PHI node, create a corresponding new PHI node with a type A. SmallDenseMap<PHINode *, PHINode *> NewPNodes; for (auto *OldPN : OldPhiNodes) { @@ -2234,9 +2319,14 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { if (auto *C = dyn_cast<Constant>(V)) { NewV = ConstantExpr::getBitCast(C, DestTy); } else if (auto *LI = dyn_cast<LoadInst>(V)) { - Builder.SetInsertPoint(LI->getNextNode()); - NewV = Builder.CreateBitCast(LI, DestTy); - Worklist.Add(LI); + // Explicitly perform load combine to make sure no opposing transform + // can remove the bitcast in the meantime and trigger an infinite loop. + Builder.SetInsertPoint(LI); + NewV = combineLoadToNewType(*LI, DestTy); + // Remove the old load and its use in the old phi, which itself becomes + // dead once the whole transform finishes. + replaceInstUsesWith(*LI, UndefValue::get(LI->getType())); + eraseInstFromFunction(*LI); } else if (auto *BCI = dyn_cast<BitCastInst>(V)) { NewV = BCI->getOperand(0); } else if (auto *PrevPN = dyn_cast<PHINode>(V)) { @@ -2259,26 +2349,33 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { Instruction *RetVal = nullptr; for (auto *OldPN : OldPhiNodes) { PHINode *NewPN = NewPNodes[OldPN]; - for (User *V : OldPN->users()) { + for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) { + User *V = *It; + // We may remove this user, advance to avoid iterator invalidation. + ++It; if (auto *SI = dyn_cast<StoreInst>(V)) { - if (SI->isSimple() && SI->getOperand(0) == OldPN) { - Builder.SetInsertPoint(SI); - auto *NewBC = - cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy)); - SI->setOperand(0, NewBC); - Worklist.Add(SI); - assert(hasStoreUsersOnly(*NewBC)); - } + assert(SI->isSimple() && SI->getOperand(0) == OldPN); + Builder.SetInsertPoint(SI); + auto *NewBC = + cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy)); + SI->setOperand(0, NewBC); + Worklist.Add(SI); + assert(hasStoreUsersOnly(*NewBC)); } else if (auto *BCI = dyn_cast<BitCastInst>(V)) { - // Verify it's a B->A cast. Type *TyB = BCI->getOperand(0)->getType(); Type *TyA = BCI->getType(); - if (TyA == DestTy && TyB == SrcTy) { - Instruction *I = replaceInstUsesWith(*BCI, NewPN); - if (BCI == &CI) - RetVal = I; - } + assert(TyA == DestTy && TyB == SrcTy); + (void) TyA; + (void) TyB; + Instruction *I = replaceInstUsesWith(*BCI, NewPN); + if (BCI == &CI) + RetVal = I; + } else if (auto *PHI = dyn_cast<PHINode>(V)) { + assert(OldPhiNodes.count(PHI) > 0); + (void) PHI; + } else { + llvm_unreachable("all uses should be handled"); } } } @@ -2374,8 +2471,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { CastInst *SrcCast = cast<CastInst>(Src); if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) if (isa<VectorType>(BCIn->getOperand(0)->getType())) - if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), - cast<VectorType>(DestTy), *this)) + if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( + BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) return I; } |