diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 314 |
1 files changed, 49 insertions, 265 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 3f851a2b2182..5c84f666616d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -25,166 +25,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// Analyze 'Val', seeing if it is a simple linear expression. -/// If so, decompose it, returning some value X, such that Val is -/// X*Scale+Offset. -/// -static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - uint64_t &Offset) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { - Offset = CI->getZExtValue(); - Scale = 0; - return ConstantInt::get(Val->getType(), 0); - } - - if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { - // Cannot look past anything that might overflow. - // We specifically require nuw because we store the Scale in an unsigned - // and perform an unsigned divide on it. - OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); - if (OBI && !OBI->hasNoUnsignedWrap()) { - Scale = 1; - Offset = 0; - return Val; - } - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { - if (I->getOpcode() == Instruction::Shl) { - // This is a value scaled by '1 << the shift amt'. - Scale = UINT64_C(1) << RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } - - if (I->getOpcode() == Instruction::Mul) { - // This value is scaled by 'RHS'. - Scale = RHS->getZExtValue(); - Offset = 0; - return I->getOperand(0); - } - - if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, - // where C1 is divisible by C2. - unsigned SubScale; - Value *SubVal = - decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); - Offset += RHS->getZExtValue(); - Scale = SubScale; - return SubVal; - } - } - } - - // Otherwise, we can't look past this. - Scale = 1; - Offset = 0; - return Val; -} - -/// If we find a cast of an allocation instruction, try to eliminate the cast by -/// moving the type information into the alloc. -Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, - AllocaInst &AI) { - PointerType *PTy = cast<PointerType>(CI.getType()); - // Opaque pointers don't have an element type we could replace with. - if (PTy->isOpaque()) - return nullptr; - - IRBuilderBase::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(&AI); - - // Get the type really allocated and the type casted to. - Type *AllocElTy = AI.getAllocatedType(); - Type *CastElTy = PTy->getNonOpaquePointerElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - - // This optimisation does not work for cases where the cast type - // is scalable and the allocated type is not. This because we need to - // know how many times the casted type fits into the allocated type. - // For the opposite case where the allocated type is scalable and the - // cast type is not this leads to poor code quality due to the - // introduction of 'vscale' into the calculations. It seems better to - // bail out for this case too until we've done a proper cost-benefit - // analysis. - bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy); - bool CastIsScalable = isa<ScalableVectorType>(CastElTy); - if (AllocIsScalable != CastIsScalable) return nullptr; - - Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy); - Align CastElTyAlign = DL.getABITypeAlign(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return nullptr; - - // If the allocation has multiple uses, only promote it if we are strictly - // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. - if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - - // The alloc and cast types should be either both fixed or both scalable. - uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue(); - uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue(); - if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; - - // If the allocation has multiple uses, only promote it if we're not - // shrinking the amount of memory being allocated. - uint64_t AllocElTyStoreSize = - DL.getTypeStoreSize(AllocElTy).getKnownMinValue(); - uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue(); - if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; - - // See if we can satisfy the modulus by pulling a scale out of the array - // size argument. - unsigned ArraySizeScale; - uint64_t ArrayOffset; - Value *NumElements = // See if the array size is a decomposable linear expr. - decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - - // If we can now satisfy the modulus, by using a non-1 scale, we really can - // do the xform. - if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; - - // We don't currently support arrays of scalable types. - assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0)); - - unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = nullptr; - if (Scale == 1) { - Amt = NumElements; - } else { - Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); - // Insert before the alloca, not before the cast. - Amt = Builder.CreateMul(Amt, NumElements); - } - - if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(AI.getArraySize()->getType(), - Offset, true); - Amt = Builder.CreateAdd(Amt, Off); - } - - AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt); - New->setAlignment(AI.getAlign()); - New->takeName(&AI); - New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); - New->setMetadata(LLVMContext::MD_DIAssignID, - AI.getMetadata(LLVMContext::MD_DIAssignID)); - - replaceAllDbgUsesWith(AI, *New, *New, DT); - - // If the allocation has multiple real uses, insert a cast and change all - // things that used it to use the new cast. This will also hack on CI, but it - // will die soon. - if (!AI.hasOneUse()) { - // New is the allocation instruction, pointer typed. AI is the original - // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast"); - replaceInstUsesWith(AI, NewCast); - eraseInstFromFunction(AI); - } - return replaceInstUsesWith(CI, New); -} - /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns /// true for, actually insert the code to evaluate the expression. Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, @@ -252,6 +92,20 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, Res = CastInst::Create( static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty); break; + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unsupported call!"); + case Intrinsic::vscale: { + Function *Fn = + Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty}); + Res = CallInst::Create(Fn->getFunctionType(), Fn); + break; + } + } + } + break; default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); @@ -294,6 +148,10 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); Type *Ty = CI.getType(); + if (auto *SrcC = dyn_cast<Constant>(Src)) + if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL)) + return replaceInstUsesWith(CI, Res); + // Try to eliminate a cast of a cast. if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) { @@ -501,16 +359,12 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, // If the integer type can hold the max FP value, it is safe to cast // directly to that type. Otherwise, we may create poison via overflow // that did not exist in the original code. - // - // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need - // at least one more bit than the MaxExponent to hold the max FP value. Type *InputTy = I->getOperand(0)->getType()->getScalarType(); const fltSemantics &Semantics = InputTy->getFltSemantics(); - uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics); - // Extra sign bit needed. - if (I->getOpcode() == Instruction::FPToSI) - ++MinBitWidth; - return Ty->getScalarSizeInBits() > MinBitWidth; + uint32_t MinBitWidth = + APFloatBase::semanticsIntSizeInBits(Semantics, + I->getOpcode() == Instruction::FPToSI); + return Ty->getScalarSizeInBits() >= MinBitWidth; } default: // TODO: Can handle more cases here. @@ -881,13 +735,12 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { Value *And = Builder.CreateAnd(X, MaskC); return new ICmpInst(ICmpInst::ICMP_NE, And, Zero); } - if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)), + if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_ImmConstant(C)), m_Deferred(X))))) { // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1)); Constant *MaskC = ConstantExpr::getShl(One, C); - MaskC = ConstantExpr::getOr(MaskC, One); - Value *And = Builder.CreateAnd(X, MaskC); + Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One)); return new ICmpInst(ICmpInst::ICMP_NE, And, Zero); } } @@ -904,11 +757,18 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // removed by the trunc. if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE, APInt(SrcWidth, MaxShiftAmt)))) { + auto GetNewShAmt = [&](unsigned Width) { + Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false); + Constant *Cmp = + ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, C, MaxAmt, DL); + Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt); + return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(), + DL); + }; + // trunc (lshr (sext A), C) --> ashr A, C if (A->getType() == DestTy) { - Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false); - Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); - ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); + Constant *ShAmt = GetNewShAmt(DestWidth); ShAmt = Constant::mergeUndefsWith(ShAmt, C); return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt) : BinaryOperator::CreateAShr(A, ShAmt); @@ -916,9 +776,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { // The types are mismatched, so create a cast after shifting: // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C) if (Src->hasOneUse()) { - Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false); - Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt); - ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType()); + Constant *ShAmt = GetNewShAmt(AWidth); Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact); return CastInst::CreateIntegerCast(Shift, DestTy, true); } @@ -998,7 +856,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) { } } - if (match(Src, m_VScale(DL))) { + if (match(Src, m_VScale())) { if (Trunc.getFunction() && Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = @@ -1217,6 +1075,13 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, return false; return true; } + case Instruction::Call: + // llvm.vscale() can always be executed in larger type, because the + // value is automatically zero-extended. + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + if (II->getIntrinsicID() == Intrinsic::vscale) + return true; + return false; default: // TODO: Can handle more cases here. return false; @@ -1226,7 +1091,8 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. - if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back())) + if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) && + !isa<Constant>(Zext.getOperand(0))) return nullptr; // If one of the common conversion will work, do it. @@ -1340,7 +1206,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) { return BinaryOperator::CreateAnd(X, ZextC); } - if (match(Src, m_VScale(DL))) { + if (match(Src, m_VScale())) { if (Zext.getFunction() && Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = @@ -1402,7 +1268,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp, if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { // sext ((x & 2^n) == 0) -> (x >> n) - 1 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 - unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + unsigned ShiftAmt = KnownZeroMask.countr_zero(); // Perform a right shift to place the desired bit in the LSB. if (ShiftAmt) In = Builder.CreateLShr(In, @@ -1416,7 +1282,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp, } else { // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 - unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + unsigned ShiftAmt = KnownZeroMask.countl_zero(); // Perform a left shift to place the desired bit in the MSB. if (ShiftAmt) In = Builder.CreateShl(In, @@ -1611,7 +1477,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) { } } - if (match(Src, m_VScale(DL))) { + if (match(Src, m_VScale())) { if (Sext.getFunction() && Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { Attribute Attr = @@ -2687,57 +2553,6 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI, return RetVal; } -static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder, - const DataLayout &DL) { - Value *Src = CI.getOperand(0); - PointerType *SrcPTy = cast<PointerType>(Src->getType()); - PointerType *DstPTy = cast<PointerType>(CI.getType()); - - // Bitcasts involving opaque pointers cannot be converted into a GEP. - if (SrcPTy->isOpaque() || DstPTy->isOpaque()) - return nullptr; - - Type *DstElTy = DstPTy->getNonOpaquePointerElementType(); - Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType(); - - // When the type pointed to is not sized the cast cannot be - // turned into a gep. - if (!SrcElTy->isSized()) - return nullptr; - - // If the source and destination are pointers, and this cast is equivalent - // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. - // This can enhance SROA and other transforms that want type-safe pointers. - unsigned NumZeros = 0; - while (SrcElTy && SrcElTy != DstElTy) { - SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0); - ++NumZeros; - } - - // If we found a path from the src to dest, create the getelementptr now. - if (SrcElTy == DstElTy) { - SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0)); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - SrcPTy->getNonOpaquePointerElementType(), Src, Idxs); - - // If the source pointer is dereferenceable, then assume it points to an - // allocated object and apply "inbounds" to the GEP. - bool CanBeNull, CanBeFreed; - if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) { - // In a non-default address space (not 0), a null pointer can not be - // assumed inbounds, so ignore that case (dereferenceable_or_null). - // The reason is that 'null' is not treated differently in these address - // spaces, and we consequently ignore the 'gep inbounds' special case - // for 'null' which allows 'inbounds' on 'null' if the indices are - // zeros. - if (SrcPTy->getAddressSpace() == 0 || !CanBeNull) - GEP->setIsInBounds(); - } - return GEP; - } - return nullptr; -} - Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. @@ -2750,19 +2565,6 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { if (DestTy == Src->getType()) return replaceInstUsesWith(CI, Src); - if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) { - // If we are casting a alloca to a pointer to a type of the same - // size, rewrite the allocation instruction to allocate the "right" type. - // There is no need to modify malloc calls because it is their bitcast that - // needs to be cleaned up. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) - if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) - return V; - - if (Instruction *I = convertBitCastToGEP(CI, Builder, DL)) - return I; - } - if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) { // Beware: messing with this target-specific oddity may cause trouble. if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) { @@ -2905,23 +2707,5 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { } Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) { - // If the destination pointer element type is not the same as the source's - // first do a bitcast to the destination type, and then the addrspacecast. - // This allows the cast to be exposed to other transforms. - Value *Src = CI.getOperand(0); - PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType()); - PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType()); - - if (!SrcTy->hasSameElementTypeAs(DestTy)) { - Type *MidTy = - PointerType::getWithSamePointeeType(DestTy, SrcTy->getAddressSpace()); - // Handle vectors of pointers. - if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) - MidTy = VectorType::get(MidTy, VT->getElementCount()); - - Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); - return new AddrSpaceCastInst(NewBitCast, CI.getType()); - } - return commonPointerCastTransforms(CI); } |