aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp314
1 files changed, 49 insertions, 265 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3f851a2b2182..5c84f666616d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -25,166 +25,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// Analyze 'Val', seeing if it is a simple linear expression.
-/// If so, decompose it, returning some value X, such that Val is
-/// X*Scale+Offset.
-///
-static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
- uint64_t &Offset) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
- Offset = CI->getZExtValue();
- Scale = 0;
- return ConstantInt::get(Val->getType(), 0);
- }
-
- if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
- // Cannot look past anything that might overflow.
- // We specifically require nuw because we store the Scale in an unsigned
- // and perform an unsigned divide on it.
- OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
- if (OBI && !OBI->hasNoUnsignedWrap()) {
- Scale = 1;
- Offset = 0;
- return Val;
- }
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (I->getOpcode() == Instruction::Shl) {
- // This is a value scaled by '1 << the shift amt'.
- Scale = UINT64_C(1) << RHS->getZExtValue();
- Offset = 0;
- return I->getOperand(0);
- }
-
- if (I->getOpcode() == Instruction::Mul) {
- // This value is scaled by 'RHS'.
- Scale = RHS->getZExtValue();
- Offset = 0;
- return I->getOperand(0);
- }
-
- if (I->getOpcode() == Instruction::Add) {
- // We have X+C. Check to see if we really have (X*C2)+C1,
- // where C1 is divisible by C2.
- unsigned SubScale;
- Value *SubVal =
- decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
- Offset += RHS->getZExtValue();
- Scale = SubScale;
- return SubVal;
- }
- }
- }
-
- // Otherwise, we can't look past this.
- Scale = 1;
- Offset = 0;
- return Val;
-}
-
-/// If we find a cast of an allocation instruction, try to eliminate the cast by
-/// moving the type information into the alloc.
-Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
- AllocaInst &AI) {
- PointerType *PTy = cast<PointerType>(CI.getType());
- // Opaque pointers don't have an element type we could replace with.
- if (PTy->isOpaque())
- return nullptr;
-
- IRBuilderBase::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(&AI);
-
- // Get the type really allocated and the type casted to.
- Type *AllocElTy = AI.getAllocatedType();
- Type *CastElTy = PTy->getNonOpaquePointerElementType();
- if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
-
- // This optimisation does not work for cases where the cast type
- // is scalable and the allocated type is not. This because we need to
- // know how many times the casted type fits into the allocated type.
- // For the opposite case where the allocated type is scalable and the
- // cast type is not this leads to poor code quality due to the
- // introduction of 'vscale' into the calculations. It seems better to
- // bail out for this case too until we've done a proper cost-benefit
- // analysis.
- bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
- bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
- if (AllocIsScalable != CastIsScalable) return nullptr;
-
- Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
- Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
- if (CastElTyAlign < AllocElTyAlign) return nullptr;
-
- // If the allocation has multiple uses, only promote it if we are strictly
- // increasing the alignment of the resultant allocation. If we keep it the
- // same, we open the door to infinite loops of various kinds.
- if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
-
- // The alloc and cast types should be either both fixed or both scalable.
- uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();
- uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();
- if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
-
- // If the allocation has multiple uses, only promote it if we're not
- // shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize =
- DL.getTypeStoreSize(AllocElTy).getKnownMinValue();
- uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();
- if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
-
- // See if we can satisfy the modulus by pulling a scale out of the array
- // size argument.
- unsigned ArraySizeScale;
- uint64_t ArrayOffset;
- Value *NumElements = // See if the array size is a decomposable linear expr.
- decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
-
- // If we can now satisfy the modulus, by using a non-1 scale, we really can
- // do the xform.
- if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
- (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
-
- // We don't currently support arrays of scalable types.
- assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
-
- unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
- Value *Amt = nullptr;
- if (Scale == 1) {
- Amt = NumElements;
- } else {
- Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
- // Insert before the alloca, not before the cast.
- Amt = Builder.CreateMul(Amt, NumElements);
- }
-
- if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
- Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
- Offset, true);
- Amt = Builder.CreateAdd(Amt, Off);
- }
-
- AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
- New->setAlignment(AI.getAlign());
- New->takeName(&AI);
- New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
- New->setMetadata(LLVMContext::MD_DIAssignID,
- AI.getMetadata(LLVMContext::MD_DIAssignID));
-
- replaceAllDbgUsesWith(AI, *New, *New, DT);
-
- // If the allocation has multiple real uses, insert a cast and change all
- // things that used it to use the new cast. This will also hack on CI, but it
- // will die soon.
- if (!AI.hasOneUse()) {
- // New is the allocation instruction, pointer typed. AI is the original
- // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
- Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
- replaceInstUsesWith(AI, NewCast);
- eraseInstFromFunction(AI);
- }
- return replaceInstUsesWith(CI, New);
-}
-
/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
/// true for, actually insert the code to evaluate the expression.
Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
@@ -252,6 +92,20 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
Res = CastInst::Create(
static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
break;
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported call!");
+ case Intrinsic::vscale: {
+ Function *Fn =
+ Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
+ Res = CallInst::Create(Fn->getFunctionType(), Fn);
+ break;
+ }
+ }
+ }
+ break;
default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
@@ -294,6 +148,10 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
+ if (auto *SrcC = dyn_cast<Constant>(Src))
+ if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
+ return replaceInstUsesWith(CI, Res);
+
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
@@ -501,16 +359,12 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
// If the integer type can hold the max FP value, it is safe to cast
// directly to that type. Otherwise, we may create poison via overflow
// that did not exist in the original code.
- //
- // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
- // at least one more bit than the MaxExponent to hold the max FP value.
Type *InputTy = I->getOperand(0)->getType()->getScalarType();
const fltSemantics &Semantics = InputTy->getFltSemantics();
- uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics);
- // Extra sign bit needed.
- if (I->getOpcode() == Instruction::FPToSI)
- ++MinBitWidth;
- return Ty->getScalarSizeInBits() > MinBitWidth;
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics,
+ I->getOpcode() == Instruction::FPToSI);
+ return Ty->getScalarSizeInBits() >= MinBitWidth;
}
default:
// TODO: Can handle more cases here.
@@ -881,13 +735,12 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
Value *And = Builder.CreateAnd(X, MaskC);
return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
}
- if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)),
+ if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_ImmConstant(C)),
m_Deferred(X))))) {
// trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
Constant *MaskC = ConstantExpr::getShl(One, C);
- MaskC = ConstantExpr::getOr(MaskC, One);
- Value *And = Builder.CreateAnd(X, MaskC);
+ Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
}
}
@@ -904,11 +757,18 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// removed by the trunc.
if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
APInt(SrcWidth, MaxShiftAmt)))) {
+ auto GetNewShAmt = [&](unsigned Width) {
+ Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
+ Constant *Cmp =
+ ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, C, MaxAmt, DL);
+ Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
+ return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
+ DL);
+ };
+
// trunc (lshr (sext A), C) --> ashr A, C
if (A->getType() == DestTy) {
- Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
- Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
- ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ Constant *ShAmt = GetNewShAmt(DestWidth);
ShAmt = Constant::mergeUndefsWith(ShAmt, C);
return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
: BinaryOperator::CreateAShr(A, ShAmt);
@@ -916,9 +776,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// The types are mismatched, so create a cast after shifting:
// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
if (Src->hasOneUse()) {
- Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
- Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
- ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ Constant *ShAmt = GetNewShAmt(AWidth);
Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
return CastInst::CreateIntegerCast(Shift, DestTy, true);
}
@@ -998,7 +856,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
}
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Trunc.getFunction() &&
Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -1217,6 +1075,13 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
return false;
return true;
}
+ case Instruction::Call:
+ // llvm.vscale() can always be executed in larger type, because the
+ // value is automatically zero-extended.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::vscale)
+ return true;
+ return false;
default:
// TODO: Can handle more cases here.
return false;
@@ -1226,7 +1091,8 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
- if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()))
+ if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
+ !isa<Constant>(Zext.getOperand(0)))
return nullptr;
// If one of the common conversion will work, do it.
@@ -1340,7 +1206,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
return BinaryOperator::CreateAnd(X, ZextC);
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Zext.getFunction() &&
Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -1402,7 +1268,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
// sext ((x & 2^n) == 0) -> (x >> n) - 1
// sext ((x & 2^n) != 2^n) -> (x >> n) - 1
- unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ unsigned ShiftAmt = KnownZeroMask.countr_zero();
// Perform a right shift to place the desired bit in the LSB.
if (ShiftAmt)
In = Builder.CreateLShr(In,
@@ -1416,7 +1282,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
} else {
// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
- unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ unsigned ShiftAmt = KnownZeroMask.countl_zero();
// Perform a left shift to place the desired bit in the MSB.
if (ShiftAmt)
In = Builder.CreateShl(In,
@@ -1611,7 +1477,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Sext.getFunction() &&
Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -2687,57 +2553,6 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
return RetVal;
}
-static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
- const DataLayout &DL) {
- Value *Src = CI.getOperand(0);
- PointerType *SrcPTy = cast<PointerType>(Src->getType());
- PointerType *DstPTy = cast<PointerType>(CI.getType());
-
- // Bitcasts involving opaque pointers cannot be converted into a GEP.
- if (SrcPTy->isOpaque() || DstPTy->isOpaque())
- return nullptr;
-
- Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
- Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
-
- // When the type pointed to is not sized the cast cannot be
- // turned into a gep.
- if (!SrcElTy->isSized())
- return nullptr;
-
- // If the source and destination are pointers, and this cast is equivalent
- // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
- // This can enhance SROA and other transforms that want type-safe pointers.
- unsigned NumZeros = 0;
- while (SrcElTy && SrcElTy != DstElTy) {
- SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
- ++NumZeros;
- }
-
- // If we found a path from the src to dest, create the getelementptr now.
- if (SrcElTy == DstElTy) {
- SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
-
- // If the source pointer is dereferenceable, then assume it points to an
- // allocated object and apply "inbounds" to the GEP.
- bool CanBeNull, CanBeFreed;
- if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
- // In a non-default address space (not 0), a null pointer can not be
- // assumed inbounds, so ignore that case (dereferenceable_or_null).
- // The reason is that 'null' is not treated differently in these address
- // spaces, and we consequently ignore the 'gep inbounds' special case
- // for 'null' which allows 'inbounds' on 'null' if the indices are
- // zeros.
- if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
- GEP->setIsInBounds();
- }
- return GEP;
- }
- return nullptr;
-}
-
Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
@@ -2750,19 +2565,6 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (DestTy == Src->getType())
return replaceInstUsesWith(CI, Src);
- if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
- // If we are casting a alloca to a pointer to a type of the same
- // size, rewrite the allocation instruction to allocate the "right" type.
- // There is no need to modify malloc calls because it is their bitcast that
- // needs to be cleaned up.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
- if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
- return V;
-
- if (Instruction *I = convertBitCastToGEP(CI, Builder, DL))
- return I;
- }
-
if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
// Beware: messing with this target-specific oddity may cause trouble.
if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
@@ -2905,23 +2707,5 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
}
Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
- // If the destination pointer element type is not the same as the source's
- // first do a bitcast to the destination type, and then the addrspacecast.
- // This allows the cast to be exposed to other transforms.
- Value *Src = CI.getOperand(0);
- PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
- PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
-
- if (!SrcTy->hasSameElementTypeAs(DestTy)) {
- Type *MidTy =
- PointerType::getWithSamePointeeType(DestTy, SrcTy->getAddressSpace());
- // Handle vectors of pointers.
- if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
- MidTy = VectorType::get(MidTy, VT->getElementCount());
-
- Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
- return new AddrSpaceCastInst(NewBitCast, CI.getType());
- }
-
return commonPointerCastTransforms(CI);
}