src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:11 +0000
commit	e3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree	8a11ba2269a3b669601e2fd41145b174008f4da8 /llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
parent	08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)

vendor/llvm-project/llvmorg-16-init-18548-gb0daacf58f41

Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp')

-rw-r--r--

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

295

1 files changed, 150 insertions, 145 deletions

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a9a930555b3c..3f851a2b2182 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

@@ -14,9 +14,12 @@

#include "llvm/ADT/SetVector.h"

#include "llvm/Analysis/ConstantFolding.h"

#include "llvm/IR/DataLayout.h"

+#include "llvm/IR/DebugInfo.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"

+#include <optional>

using namespace llvm;

using namespace PatternMatch;

@@ -118,14 +121,15 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,

if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;

// The alloc and cast types should be either both fixed or both scalable.

- uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();

- uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();

+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();

+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();

if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;

// If the allocation has multiple uses, only promote it if we're not

// shrinking the amount of memory being allocated.

- uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();

- uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();

+ uint64_t AllocElTyStoreSize =

+ DL.getTypeStoreSize(AllocElTy).getKnownMinValue();

+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();

if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;

// See if we can satisfy the modulus by pulling a scale out of the array

@@ -163,6 +167,10 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,

New->setAlignment(AI.getAlign());

New->takeName(&AI);

New->setUsedWithInAlloca(AI.isUsedWithInAlloca());

+ New->setMetadata(LLVMContext::MD_DIAssignID,

+ AI.getMetadata(LLVMContext::MD_DIAssignID));

+ replaceAllDbgUsesWith(AI, *New, *New, DT);

// If the allocation has multiple real uses, insert a cast and change all

// things that used it to use the new cast. This will also hack on CI, but it

@@ -239,6 +247,11 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,

Res = NPN;

break;

}

+ case Instruction::FPToUI:

+ case Instruction::FPToSI:

+ Res = CastInst::Create(

+ static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);

+ break;

default:

// TODO: Can handle more cases here.

llvm_unreachable("Unreachable!");

@@ -483,6 +496,22 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,

return false;

return true;

}

+ case Instruction::FPToUI:

+ case Instruction::FPToSI: {

+ // If the integer type can hold the max FP value, it is safe to cast

+ // directly to that type. Otherwise, we may create poison via overflow

+ // that did not exist in the original code.

+ //

+ // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need

+ // at least one more bit than the MaxExponent to hold the max FP value.

+ Type *InputTy = I->getOperand(0)->getType()->getScalarType();

+ const fltSemantics &Semantics = InputTy->getFltSemantics();

+ uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics);

+ // Extra sign bit needed.

+ if (I->getOpcode() == Instruction::FPToSI)

+ ++MinBitWidth;

+ return Ty->getScalarSizeInBits() > MinBitWidth;

+ }

default:

// TODO: Can handle more cases here.

break;

@@ -726,7 +755,7 @@ static Instruction *shrinkSplatShuffle(TruncInst &Trunc,

InstCombiner::BuilderTy &Builder) {

auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));

if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&

- is_splat(Shuf->getShuffleMask()) &&

+ all_equal(Shuf->getShuffleMask()) &&

Shuf->getType() == Shuf->getOperand(0)->getType()) {

// trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask

// trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask

@@ -974,7 +1003,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {

Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {

Attribute Attr =

Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);

- if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

+ if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

if (Log2_32(*MaxVScale) < DestWidth) {

Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));

return replaceInstUsesWith(Trunc, VScale);

@@ -986,7 +1015,8 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {

return nullptr;

}

-Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {

+Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,

+ ZExtInst &Zext) {

// If we are just checking for a icmp eq of a single bit and zext'ing it

// to an integer, then shift the bit to the appropriate place and then

// cast to integer to avoid the comparison.

@@ -1014,28 +1044,20 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)

// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.

// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.

- // zext (X == 1) to i32 --> X iff X has only the low bit set.

- // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.

// zext (X != 0) to i32 --> X iff X has only the low bit set.

// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.

- // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.

- // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.

- if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&

- // This only works for EQ and NE

- Cmp->isEquality()) {

+ if (Op1CV->isZero() && Cmp->isEquality() &&

+ (Cmp->getOperand(0)->getType() == Zext.getType() ||

+ Cmp->getPredicate() == ICmpInst::ICMP_NE)) {

// If Op1C some other power of two, convert:

KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);

+ // Exactly 1 possible 1? But not the high-bit because that is

+ // canonicalized to this form.

APInt KnownZeroMask(~Known.Zero);

- if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?

- bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;

- if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {

- // (X&4) == 2 --> false

- // (X&4) != 2 --> true

- Constant *Res = ConstantInt::get(Zext.getType(), isNE);

- return replaceInstUsesWith(Zext, Res);

- }

+ if (KnownZeroMask.isPowerOf2() &&

+ (Zext.getType()->getScalarSizeInBits() !=

+ KnownZeroMask.logBase2() + 1)) {

uint32_t ShAmt = KnownZeroMask.logBase2();

Value *In = Cmp->getOperand(0);

if (ShAmt) {

@@ -1045,10 +1067,9 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)

In->getName() + ".lobit");

}

- if (!Op1CV->isZero() == isNE) { // Toggle the low bit.

- Constant *One = ConstantInt::get(In->getType(), 1);

- In = Builder.CreateXor(In, One);

- }

+ // Toggle the low bit for "X == 0".

+ if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)

+ In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));

if (Zext.getType() == In->getType())

return replaceInstUsesWith(Zext, In);

@@ -1073,39 +1094,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext)

Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));

return replaceInstUsesWith(Zext, And1);

}

- // icmp ne A, B is equal to xor A, B when A and B only really have one bit.

- // It is also profitable to transform icmp eq into not(xor(A, B)) because

- // that may lead to additional simplifications.

- if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {

- Value *LHS = Cmp->getOperand(0);

- Value *RHS = Cmp->getOperand(1);

- KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);

- KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);

- if (KnownLHS == KnownRHS) {

- APInt KnownBits = KnownLHS.Zero | KnownLHS.One;

- APInt UnknownBit = ~KnownBits;

- if (UnknownBit.countPopulation() == 1) {

- Value *Result = Builder.CreateXor(LHS, RHS);

- // Mask off any bits that are set and won't be shifted away.

- if (KnownLHS.One.uge(UnknownBit))

- Result = Builder.CreateAnd(Result,

- ConstantInt::get(ITy, UnknownBit));

- // Shift the bit we're testing down to the lsb.

- Result = Builder.CreateLShr(

- Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));

- if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)

- Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));

- Result->takeName(Cmp);

- return replaceInstUsesWith(Zext, Result);

- }

}

return nullptr;

@@ -1235,23 +1223,23 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,

}

-Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

+Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {

// If this zero extend is only used by a truncate, let the truncate be

// eliminated before we try to optimize this zext.

- if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))

+ if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()))

return nullptr;

// If one of the common conversion will work, do it.

- if (Instruction *Result = commonCastTransforms(CI))

+ if (Instruction *Result = commonCastTransforms(Zext))

return Result;

- Value *Src = CI.getOperand(0);

- Type *SrcTy = Src->getType(), *DestTy = CI.getType();

+ Value *Src = Zext.getOperand(0);

+ Type *SrcTy = Src->getType(), *DestTy = Zext.getType();

// Try to extend the entire expression tree to the wide destination type.

unsigned BitsToClear;

if (shouldChangeType(SrcTy, DestTy) &&

- canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {

+ canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {

assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&

"Can't clear more bits than in SrcTy");

@@ -1259,25 +1247,25 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

LLVM_DEBUG(

dbgs() << "ICE: EvaluateInDifferentType converting expression type"

" to avoid zero extend: "

- << CI << '\n');

+ << Zext << '\n');

Value *Res = EvaluateInDifferentType(Src, DestTy, false);

assert(Res->getType() == DestTy);

// Preserve debug values referring to Src if the zext is its last use.

if (auto *SrcOp = dyn_cast<Instruction>(Src))

if (SrcOp->hasOneUse())

- replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);

+ replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);

- uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;

+ uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;

uint32_t DestBitSize = DestTy->getScalarSizeInBits();

// If the high bits are already filled with zeros, just replace this

// cast with the result.

if (MaskedValueIsZero(Res,

APInt::getHighBitsSet(DestBitSize,

- DestBitSize-SrcBitsKept),

- 0, &CI))

- return replaceInstUsesWith(CI, Res);

+ DestBitSize - SrcBitsKept),

+ 0, &Zext))

+ return replaceInstUsesWith(Zext, Res);

// We need to emit an AND to clear the high bits.

Constant *C = ConstantInt::get(Res->getType(),

@@ -1288,7 +1276,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

// If this is a TRUNC followed by a ZEXT then we are dealing with integral

// types and if the sizes are just right we can convert this into a logical

// 'and' which will be much cheaper than the pair of casts.

- if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast

+ if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast

// TODO: Subsume this into EvaluateInDifferentType.

// Get the sizes of the types involved. We know that the intermediate type

@@ -1296,7 +1284,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

Value *A = CSrc->getOperand(0);

unsigned SrcSize = A->getType()->getScalarSizeInBits();

unsigned MidSize = CSrc->getType()->getScalarSizeInBits();

- unsigned DstSize = CI.getType()->getScalarSizeInBits();

+ unsigned DstSize = DestTy->getScalarSizeInBits();

// If we're actually extending zero bits, then if

// SrcSize < DstSize: zext(a & mask)

// SrcSize == DstSize: a & mask

@@ -1305,7 +1293,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));

Constant *AndConst = ConstantInt::get(A->getType(), AndValue);

Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");

- return new ZExtInst(And, CI.getType());

+ return new ZExtInst(And, DestTy);

}

if (SrcSize == DstSize) {

@@ -1314,7 +1302,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

AndValue));

}

if (SrcSize > DstSize) {

- Value *Trunc = Builder.CreateTrunc(A, CI.getType());

+ Value *Trunc = Builder.CreateTrunc(A, DestTy);

APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));

return BinaryOperator::CreateAnd(Trunc,

ConstantInt::get(Trunc->getType(),

@@ -1322,34 +1310,46 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

}

- if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))

- return transformZExtICmp(Cmp, CI);

+ if (auto *Cmp = dyn_cast<ICmpInst>(Src))

+ return transformZExtICmp(Cmp, Zext);

// zext(trunc(X) & C) -> (X & zext(C)).

Constant *C;

Value *X;

if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&

- X->getType() == CI.getType())

- return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));

+ X->getType() == DestTy)

+ return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));

// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).

Value *And;

if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&

match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&

- X->getType() == CI.getType()) {

- Constant *ZC = ConstantExpr::getZExt(C, CI.getType());

+ X->getType() == DestTy) {

+ Constant *ZC = ConstantExpr::getZExt(C, DestTy);

return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);

}

+ // If we are truncating, masking, and then zexting back to the original type,

+ // that's just a mask. This is not handled by canEvaluateZextd if the

+ // intermediate values have extra uses. This could be generalized further for

+ // a non-constant mask operand.

+ // zext (and (trunc X), C) --> and X, (zext C)

+ if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&

+ X->getType() == DestTy) {

+ Constant *ZextC = ConstantExpr::getZExt(C, DestTy);

+ return BinaryOperator::CreateAnd(X, ZextC);

+ }

if (match(Src, m_VScale(DL))) {

- if (CI.getFunction() &&

- CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {

- Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);

- if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

+ if (Zext.getFunction() &&

+ Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {

+ Attribute Attr =

+ Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);

+ if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

unsigned TypeWidth = Src->getType()->getScalarSizeInBits();

if (Log2_32(*MaxVScale) < TypeWidth) {

Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));

- return replaceInstUsesWith(CI, VScale);

+ return replaceInstUsesWith(Zext, VScale);

}

@@ -1359,48 +1359,44 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {

}

/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.

-Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,

- Instruction &CI) {

- Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);

- ICmpInst::Predicate Pred = ICI->getPredicate();

+Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,

+ SExtInst &Sext) {

+ Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);

+ ICmpInst::Predicate Pred = Cmp->getPredicate();

// Don't bother if Op1 isn't of vector or integer type.

if (!Op1->getType()->isIntOrIntVectorTy())

return nullptr;

- if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||

- (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {

- // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative

- // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive

+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {

+ // sext (x <s 0) --> ashr x, 31 (all ones if negative)

Value *Sh = ConstantInt::get(Op0->getType(),

Op0->getType()->getScalarSizeInBits() - 1);

Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");

- if (In->getType() != CI.getType())

- In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);

+ if (In->getType() != Sext.getType())

+ In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);

- if (Pred == ICmpInst::ICMP_SGT)

- In = Builder.CreateNot(In, In->getName() + ".not");

- return replaceInstUsesWith(CI, In);

+ return replaceInstUsesWith(Sext, In);

}

if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {

// If we know that only one bit of the LHS of the icmp can be set and we

// have an equality comparison with zero or a power of 2, we can transform

// the icmp and sext into bitwise/integer operations.

- if (ICI->hasOneUse() &&

- ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){

- KnownBits Known = computeKnownBits(Op0, 0, &CI);

+ if (Cmp->hasOneUse() &&

+ Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){

+ KnownBits Known = computeKnownBits(Op0, 0, &Sext);

APInt KnownZeroMask(~Known.Zero);

if (KnownZeroMask.isPowerOf2()) {

- Value *In = ICI->getOperand(0);

+ Value *In = Cmp->getOperand(0);

// If the icmp tests for a known zero bit we can constant fold it.

if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {

Value *V = Pred == ICmpInst::ICMP_NE ?

- ConstantInt::getAllOnesValue(CI.getType()) :

- ConstantInt::getNullValue(CI.getType());

- return replaceInstUsesWith(CI, V);

+ ConstantInt::getAllOnesValue(Sext.getType()) :

+ ConstantInt::getNullValue(Sext.getType());

+ return replaceInstUsesWith(Sext, V);

}

if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {

@@ -1431,9 +1427,9 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,

KnownZeroMask.getBitWidth() - 1), "sext");

}

- if (CI.getType() == In->getType())

- return replaceInstUsesWith(CI, In);

- return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);

+ if (Sext.getType() == In->getType())

+ return replaceInstUsesWith(Sext, In);

+ return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);

}

@@ -1496,22 +1492,22 @@ static bool canEvaluateSExtd(Value *V, Type *Ty) {

return false;

}

-Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

+Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {

// If this sign extend is only used by a truncate, let the truncate be

// eliminated before we try to optimize this sext.

- if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))

+ if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))

return nullptr;

- if (Instruction *I = commonCastTransforms(CI))

+ if (Instruction *I = commonCastTransforms(Sext))

return I;

- Value *Src = CI.getOperand(0);

- Type *SrcTy = Src->getType(), *DestTy = CI.getType();

+ Value *Src = Sext.getOperand(0);

+ Type *SrcTy = Src->getType(), *DestTy = Sext.getType();

unsigned SrcBitSize = SrcTy->getScalarSizeInBits();

unsigned DestBitSize = DestTy->getScalarSizeInBits();

// If the value being extended is zero or positive, use a zext instead.

- if (isKnownNonNegative(Src, DL, 0, &AC, &CI, &DT))

+ if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))

return CastInst::Create(Instruction::ZExt, Src, DestTy);

// Try to extend the entire expression tree to the wide destination type.

@@ -1520,14 +1516,14 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

LLVM_DEBUG(

dbgs() << "ICE: EvaluateInDifferentType converting expression type"

" to avoid sign extend: "

- << CI << '\n');

+ << Sext << '\n');

Value *Res = EvaluateInDifferentType(Src, DestTy, true);

assert(Res->getType() == DestTy);

// If the high bits are already filled with sign bit, just replace this

// cast with the result.

- if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)

- return replaceInstUsesWith(CI, Res);

+ if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)

+ return replaceInstUsesWith(Sext, Res);

// We need to emit a shl + ashr to do the sign extend.

Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);

@@ -1540,7 +1536,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

// If the input has more sign bits than bits truncated, then convert

// directly to final type.

unsigned XBitSize = X->getType()->getScalarSizeInBits();

- if (ComputeNumSignBits(X, 0, &CI) > XBitSize - SrcBitSize)

+ if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)

return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);

// If input is a trunc from the destination type, then convert into shifts.

@@ -1563,8 +1559,8 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

}

- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))

- return transformSExtICmp(ICI, CI);

+ if (auto *Cmp = dyn_cast<ICmpInst>(Src))

+ return transformSExtICmp(Cmp, Sext);

// If the input is a shl/ashr pair of a same constant, then this is a sign

// extension from a smaller value. If we could trust arbitrary bitwidth

@@ -1593,7 +1589,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

NumLowbitsLeft);

NewShAmt =

Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);

- A = Builder.CreateShl(A, NewShAmt, CI.getName());

+ A = Builder.CreateShl(A, NewShAmt, Sext.getName());

return BinaryOperator::CreateAShr(A, NewShAmt);

}

@@ -1616,13 +1612,14 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {

}

if (match(Src, m_VScale(DL))) {

- if (CI.getFunction() &&

- CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {

- Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);

- if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

+ if (Sext.getFunction() &&

+ Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {

+ Attribute Attr =

+ Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);

+ if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {

if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {

Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));

- return replaceInstUsesWith(CI, VScale);

+ return replaceInstUsesWith(Sext, VScale);

}

@@ -1659,7 +1656,6 @@ static Type *shrinkFPConstant(ConstantFP *CFP) {

// Determine if this is a vector of ConstantFPs and if so, return the minimal

// type we can safely truncate all elements to.

-// TODO: Make these support undef elements.

static Type *shrinkFPConstantVector(Value *V) {

auto *CV = dyn_cast<Constant>(V);

auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());

@@ -1673,6 +1669,9 @@ static Type *shrinkFPConstantVector(Value *V) {

// For fixed-width vectors we find the minimal type by looking

// through the constant values of the vector.

for (unsigned i = 0; i != NumElts; ++i) {

+ if (isa<UndefValue>(CV->getAggregateElement(i)))

+ continue;

auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));

if (!CFP)

return nullptr;

@@ -1688,7 +1687,7 @@ static Type *shrinkFPConstantVector(Value *V) {

}

// Make a vector type from the minimal type.

- return FixedVectorType::get(MinType, NumElts);

+ return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;

}

/// Find the minimum FP type we can safely truncate to.

@@ -2862,21 +2861,27 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {

}

- // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as

- // a byte-swap:

- // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)

- // TODO: We should match the related pattern for bitreverse.

- if (DestTy->isIntegerTy() &&

- DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&

- SrcTy->getScalarSizeInBits() == 8 &&

- ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&

- Shuf->isReverse()) {

- assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");

- assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");

- Function *Bswap =

- Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);

- Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);

- return CallInst::Create(Bswap, { ScalarX });

+ // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized

+ // as a byte/bit swap:

+ // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)

+ // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)

+ if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&

+ Shuf->hasOneUse() && Shuf->isReverse()) {

+ unsigned IntrinsicNum = 0;

+ if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&

+ SrcTy->getScalarSizeInBits() == 8) {

+ IntrinsicNum = Intrinsic::bswap;

+ } else if (SrcTy->getScalarSizeInBits() == 1) {

+ IntrinsicNum = Intrinsic::bitreverse;

+ }

+ if (IntrinsicNum != 0) {

+ assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");

+ assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");

+ Function *BswapOrBitreverse =

+ Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);

+ Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);

+ return CallInst::Create(BswapOrBitreverse, {ScalarX});

+ }

}