summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp249
1 files changed, 173 insertions, 76 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 65aaef28d87a..71b7f279e5fa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
+#include <numeric>
using namespace llvm;
using namespace PatternMatch;
@@ -843,33 +844,33 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return nullptr;
}
-Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
bool DoTransform) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
const APInt *Op1CV;
- if (match(ICI->getOperand(1), m_APInt(Op1CV))) {
+ if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
- if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
- (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
- if (!DoTransform) return ICI;
+ if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
+ (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
+ if (!DoTransform) return Cmp;
- Value *In = ICI->getOperand(0);
+ Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
- if (In->getType() != CI.getType())
- In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/);
+ if (In->getType() != Zext.getType())
+ In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
- if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+ if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder.CreateXor(In, One, In->getName() + ".not");
}
- return replaceInstUsesWith(CI, In);
+ return replaceInstUsesWith(Zext, In);
}
// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
@@ -882,24 +883,24 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
// This only works for EQ and NE
- ICI->isEquality()) {
+ Cmp->isEquality()) {
// If Op1C some other power of two, convert:
- KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI);
+ KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
- if (!DoTransform) return ICI;
+ if (!DoTransform) return Cmp;
- bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+ bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false
// (X&4) != 2 --> true
- Constant *Res = ConstantInt::get(CI.getType(), isNE);
- return replaceInstUsesWith(CI, Res);
+ Constant *Res = ConstantInt::get(Zext.getType(), isNE);
+ return replaceInstUsesWith(Zext, Res);
}
uint32_t ShAmt = KnownZeroMask.logBase2();
- Value *In = ICI->getOperand(0);
+ Value *In = Cmp->getOperand(0);
if (ShAmt) {
// Perform a logical shr by shiftamt.
// Insert the shift to put the result in the low bit.
@@ -912,11 +913,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
In = Builder.CreateXor(In, One);
}
- if (CI.getType() == In->getType())
- return replaceInstUsesWith(CI, In);
+ if (Zext.getType() == In->getType())
+ return replaceInstUsesWith(Zext, In);
- Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false);
- return replaceInstUsesWith(CI, IntCast);
+ Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
+ return replaceInstUsesWith(Zext, IntCast);
}
}
}
@@ -924,19 +925,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// icmp ne A, B is equal to xor A, B when A and B only really have one bit.
// It is also profitable to transform icmp eq into not(xor(A, B)) because that
// may lead to additional simplifications.
- if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
+ if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
- KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI);
- KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI);
+ KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
+ KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
- if (!DoTransform) return ICI;
+ if (!DoTransform) return Cmp;
Value *Result = Builder.CreateXor(LHS, RHS);
@@ -949,10 +950,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Result = Builder.CreateLShr(
Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
- if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
- Result->takeName(ICI);
- return replaceInstUsesWith(CI, Result);
+ Result->takeName(Cmp);
+ return replaceInstUsesWith(Zext, Result);
}
}
}
@@ -1172,8 +1173,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
}
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
- return transformZExtICmp(ICI, CI);
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
+ return transformZExtICmp(Cmp, CI);
BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
if (SrcI && SrcI->getOpcode() == Instruction::Or) {
@@ -1188,7 +1189,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
- BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast);
+ Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
+ if (auto *OrInst = dyn_cast<Instruction>(Or))
+ Builder.SetInsertPoint(OrInst);
// Perform the elimination.
if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
@@ -1196,7 +1199,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
transformZExtICmp(RHS, *RZExt);
- return Or;
+ return replaceInstUsesWith(CI, Or);
}
}
@@ -1621,6 +1624,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
Value *X;
Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
if (Op && Op->hasOneUse()) {
+ // FIXME: The FMF should propagate from the fptrunc, not the source op.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (isa<FPMathOperator>(Op))
+ Builder.setFastMathFlags(Op->getFastMathFlags());
+
if (match(Op, m_FNeg(m_Value(X)))) {
Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
@@ -1630,6 +1638,24 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
return BinaryOperator::CreateFNegFMF(InnerTrunc, Op);
return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
}
+
+ // If we are truncating a select that has an extended operand, we can
+ // narrow the other operand and do the select as a narrow op.
+ Value *Cond, *X, *Y;
+ if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
+ X->getType() == Ty) {
+ // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
+ Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
+ Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
+ return replaceInstUsesWith(FPT, Sel);
+ }
+ if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
+ X->getType() == Ty) {
+ // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
+ Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
+ Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
+ return replaceInstUsesWith(FPT, Sel);
+ }
}
if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
@@ -1808,7 +1834,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL.getIndexSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
@@ -1820,12 +1846,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
}
/// This input value (which is known to have vector type) is being zero extended
-/// or truncated to the specified vector type.
+/// or truncated to the specified vector type. Since the zext/trunc is done
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
+/// endianness will impact which end of the vector that is extended or
+/// truncated.
+///
+/// A vector is always stored with index 0 at the lowest address, which
+/// corresponds to the most significant bits for a big endian stored integer and
+/// the least significant bits for little endian. A trunc/zext of an integer
+/// impacts the big end of the integer. Thus, we need to add/remove elements at
+/// the front of the vector for big endian targets, and the back of the vector
+/// for little endian targets.
+///
/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
///
/// The source and destination vector types may have different element types.
-static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
- InstCombiner &IC) {
+static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
+ VectorType *DestTy,
+ InstCombiner &IC) {
// We can only do this optimization if the output is a multiple of the input
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
@@ -1844,31 +1882,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
}
+ bool IsBigEndian = IC.getDataLayout().isBigEndian();
+ unsigned SrcElts = SrcTy->getNumElements();
+ unsigned DestElts = DestTy->getNumElements();
+
+ assert(SrcElts != DestElts && "Element counts should be different.");
+
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
- SmallVector<uint32_t, 16> ShuffleMask;
+ SmallVector<uint32_t, 16> ShuffleMaskStorage;
+ ArrayRef<uint32_t> ShuffleMask;
Value *V2;
- if (SrcTy->getNumElements() > DestTy->getNumElements()) {
- // If we're shrinking the number of elements, just shuffle in the low
- // elements from the input and use undef as the second shuffle input.
- V2 = UndefValue::get(SrcTy);
- for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
- ShuffleMask.push_back(i);
+ // Produce an identify shuffle mask for the src vector.
+ ShuffleMaskStorage.resize(SrcElts);
+ std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
+ if (SrcElts > DestElts) {
+ // If we're shrinking the number of elements (rewriting an integer
+ // truncate), just shuffle in the elements corresponding to the least
+ // significant bits from the input and use undef as the second shuffle
+ // input.
+ V2 = UndefValue::get(SrcTy);
+ // Make sure the shuffle mask selects the "least significant bits" by
+ // keeping elements from back of the src vector for big endian, and from the
+ // front for little endian.
+ ShuffleMask = ShuffleMaskStorage;
+ if (IsBigEndian)
+ ShuffleMask = ShuffleMask.take_back(DestElts);
+ else
+ ShuffleMask = ShuffleMask.take_front(DestElts);
} else {
- // If we're increasing the number of elements, shuffle in all of the
- // elements from InVal and fill the rest of the result elements with zeros
- // from a constant zero.
+ // If we're increasing the number of elements (rewriting an integer zext),
+ // shuffle in all of the elements from InVal. Fill the rest of the result
+ // elements with zeros from a constant zero.
V2 = Constant::getNullValue(SrcTy);
- unsigned SrcElts = SrcTy->getNumElements();
- for (unsigned i = 0, e = SrcElts; i != e; ++i)
- ShuffleMask.push_back(i);
-
- // The excess elements reference the first element of the zero input.
- for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
- ShuffleMask.push_back(SrcElts);
+ // Use first elt from V2 when indicating zero in the shuffle mask.
+ uint32_t NullElt = SrcElts;
+ // Extend with null values in the "most significant bits" by adding elements
+ // in front of the src vector for big endian, and at the back for little
+ // endian.
+ unsigned DeltaElts = DestElts - SrcElts;
+ if (IsBigEndian)
+ ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
+ else
+ ShuffleMaskStorage.append(DeltaElts, NullElt);
+ ShuffleMask = ShuffleMaskStorage;
}
return new ShuffleVectorInst(InVal, V2,
@@ -2217,6 +2277,31 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
}
}
+ // Check that each user of each old PHI node is something that we can
+ // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
+ for (auto *OldPN : OldPhiNodes) {
+ for (User *V : OldPN->users()) {
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ if (!SI->isSimple() || SI->getOperand(0) != OldPN)
+ return nullptr;
+ } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+ // Verify it's a B->A cast.
+ Type *TyB = BCI->getOperand(0)->getType();
+ Type *TyA = BCI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return nullptr;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // As long as the user is another old PHI node, then even if we don't
+ // rewrite it, the PHI web we're considering won't have any users
+ // outside itself, so it'll be dead.
+ if (OldPhiNodes.count(PHI) == 0)
+ return nullptr;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+
// For each old PHI node, create a corresponding new PHI node with a type A.
SmallDenseMap<PHINode *, PHINode *> NewPNodes;
for (auto *OldPN : OldPhiNodes) {
@@ -2234,9 +2319,14 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
if (auto *C = dyn_cast<Constant>(V)) {
NewV = ConstantExpr::getBitCast(C, DestTy);
} else if (auto *LI = dyn_cast<LoadInst>(V)) {
- Builder.SetInsertPoint(LI->getNextNode());
- NewV = Builder.CreateBitCast(LI, DestTy);
- Worklist.Add(LI);
+ // Explicitly perform load combine to make sure no opposing transform
+ // can remove the bitcast in the meantime and trigger an infinite loop.
+ Builder.SetInsertPoint(LI);
+ NewV = combineLoadToNewType(*LI, DestTy);
+ // Remove the old load and its use in the old phi, which itself becomes
+ // dead once the whole transform finishes.
+ replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
+ eraseInstFromFunction(*LI);
} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
NewV = BCI->getOperand(0);
} else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
@@ -2259,26 +2349,33 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
Instruction *RetVal = nullptr;
for (auto *OldPN : OldPhiNodes) {
PHINode *NewPN = NewPNodes[OldPN];
- for (User *V : OldPN->users()) {
+ for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) {
+ User *V = *It;
+ // We may remove this user, advance to avoid iterator invalidation.
+ ++It;
if (auto *SI = dyn_cast<StoreInst>(V)) {
- if (SI->isSimple() && SI->getOperand(0) == OldPN) {
- Builder.SetInsertPoint(SI);
- auto *NewBC =
- cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
- SI->setOperand(0, NewBC);
- Worklist.Add(SI);
- assert(hasStoreUsersOnly(*NewBC));
- }
+ assert(SI->isSimple() && SI->getOperand(0) == OldPN);
+ Builder.SetInsertPoint(SI);
+ auto *NewBC =
+ cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+ SI->setOperand(0, NewBC);
+ Worklist.Add(SI);
+ assert(hasStoreUsersOnly(*NewBC));
}
else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
- // Verify it's a B->A cast.
Type *TyB = BCI->getOperand(0)->getType();
Type *TyA = BCI->getType();
- if (TyA == DestTy && TyB == SrcTy) {
- Instruction *I = replaceInstUsesWith(*BCI, NewPN);
- if (BCI == &CI)
- RetVal = I;
- }
+ assert(TyA == DestTy && TyB == SrcTy);
+ (void) TyA;
+ (void) TyB;
+ Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+ if (BCI == &CI)
+ RetVal = I;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ assert(OldPhiNodes.count(PHI) > 0);
+ (void) PHI;
+ } else {
+ llvm_unreachable("all uses should be handled");
}
}
}
@@ -2374,8 +2471,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
CastInst *SrcCast = cast<CastInst>(Src);
if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
- cast<VectorType>(DestTy), *this))
+ if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
+ BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
return I;
}