aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp383
1 files changed, 317 insertions, 66 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 05b28328afbf..67ef2e895b6c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -15,21 +15,18 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
@@ -74,7 +71,6 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <cstring>
#include <utility>
#include <vector>
@@ -108,6 +104,19 @@ static Type *getPromotedType(Type *Ty) {
return Ty;
}
+/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
+/// TODO: This should probably be integrated with visitAllocSites, but that
+/// requires a deeper change to allow either unread or unwritten objects.
+static bool hasUndefSource(AnyMemTransferInst *MI) {
+ auto *Src = MI->getRawSource();
+ while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
+ if (!Src->hasOneUse())
+ return false;
+ Src = cast<Instruction>(Src)->getOperand(0);
+ }
+ return isa<AllocaInst>(Src) && Src->hasOneUse();
+}
+
Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
MaybeAlign CopyDstAlign = MI->getDestAlign();
@@ -132,6 +141,14 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return MI;
}
+ // If the source is provably undef, the memcpy/memmove doesn't do anything
+ // (unless the transfer is volatile).
+ if (hasUndefSource(MI) && !MI->isVolatile()) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
@@ -241,6 +258,15 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return MI;
}
+ // Remove memset with an undef value.
+ // FIXME: This is technically incorrect because it might overwrite a poison
+ // value. Change to PoisonValue once #52930 is resolved.
+ if (isa<UndefValue>(MI->getValue())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -248,7 +274,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return nullptr;
const uint64_t Len = LenC->getLimitedValue();
assert(Len && "0-sized memory setting should be removed already.");
- const Align Alignment = assumeAligned(MI->getDestAlignment());
+ const Align Alignment = MI->getDestAlign().valueOrOne();
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
@@ -769,7 +795,7 @@ static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
/// \p Result and a constant \p Overflow value.
static Instruction *createOverflowTuple(IntrinsicInst *II, Value *Result,
Constant *Overflow) {
- Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
+ Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Result, 0);
@@ -795,6 +821,10 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
if (Known.isNegative())
return true;
+ Value *X, *Y;
+ if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
+ return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL);
+
return isImpliedByDomCondition(
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
@@ -837,6 +867,67 @@ static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
}
+/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
+Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
+ Type *Ty = MinMax1.getType();
+
+ // We are looking for a tree of:
+ // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
+ // Where the min and max could be reversed
+ Instruction *MinMax2;
+ BinaryOperator *AddSub;
+ const APInt *MinValue, *MaxValue;
+ if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
+ if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
+ return nullptr;
+ } else if (match(&MinMax1,
+ m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
+ if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
+ return nullptr;
+ } else
+ return nullptr;
+
+ // Check that the constants clamp a saturate, and that the new type would be
+ // sensible to convert to.
+ if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
+ return nullptr;
+ // In what bitwidth can this be treated as saturating arithmetics?
+ unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
+ // FIXME: This isn't quite right for vectors, but using the scalar type is a
+ // good first approximation for what should be done there.
+ if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
+ return nullptr;
+
+ // Also make sure that the inner min/max and the add/sub have one use.
+ if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
+ return nullptr;
+
+ // Create the new type (which can be a vector type)
+ Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
+
+ Intrinsic::ID IntrinsicID;
+ if (AddSub->getOpcode() == Instruction::Add)
+ IntrinsicID = Intrinsic::sadd_sat;
+ else if (AddSub->getOpcode() == Instruction::Sub)
+ IntrinsicID = Intrinsic::ssub_sat;
+ else
+ return nullptr;
+
+ // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
+ // is usually achieved via a sext from a smaller type.
+ if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
+ NewBitWidth ||
+ ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
+ return nullptr;
+
+ // Finally create and return the sat intrinsic, truncated to the new type
+ Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
+ Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
+ Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
+ Value *Sat = Builder.CreateCall(F, {AT, BT});
+ return CastInst::Create(Instruction::SExt, Sat, Ty);
+}
+
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
@@ -879,6 +970,59 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
}
+/// If this min/max has a constant operand and an operand that is a matching
+/// min/max with a constant operand, constant-fold the 2 constant operands.
+static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+ if (!LHS || LHS->getIntrinsicID() != MinMaxID)
+ return nullptr;
+
+ Constant *C0, *C1;
+ if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
+ !match(II->getArgOperand(1), m_ImmConstant(C1)))
+ return nullptr;
+
+ // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
+ ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
+ Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
+ Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
+
+ Module *Mod = II->getModule();
+ Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
+ return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
+}
+
+/// If this min/max has a matching min/max operand with a constant, try to push
+/// the constant operand into this instruction. This can enable more folds.
+static Instruction *
+reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ // Match and capture a min/max operand candidate.
+ Value *X, *Y;
+ Constant *C;
+ Instruction *Inner;
+ if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
+ m_Instruction(Inner),
+ m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
+ m_Value(Y))))
+ return nullptr;
+
+ // The inner op must match. Check for constants to avoid infinite loops.
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
+ if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
+ match(X, m_ImmConstant()) || match(Y, m_ImmConstant()))
+ return nullptr;
+
+ // max (max X, C), Y --> max (max X, Y), C
+ Function *MinMax =
+ Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
+ Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
+ NewInner->takeName(Inner);
+ return CallInst::Create(MinMax, {NewInner, C});
+}
+
/// Reduce a sequence of min/max intrinsics with a common operand.
static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
@@ -936,6 +1080,56 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
}
+/// If all arguments of the intrinsic are unary shuffles with the same mask,
+/// try to shuffle after the intrinsic.
+static Instruction *
+foldShuffledIntrinsicOperands(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
+ // etc. Use llvm::isTriviallyVectorizable() and related to determine
+ // which intrinsics are safe to shuffle?
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::fma:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ break;
+ default:
+ return nullptr;
+ }
+
+ Value *X;
+ ArrayRef<int> Mask;
+ if (!match(II->getArgOperand(0),
+ m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+ return nullptr;
+
+ // At least 1 operand must have 1 use because we are creating 2 instructions.
+ if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
+ return nullptr;
+
+ // See if all arguments are shuffled with the same mask.
+ SmallVector<Value *, 4> NewArgs(II->arg_size());
+ NewArgs[0] = X;
+ Type *SrcTy = X->getType();
+ for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
+ if (!match(II->getArgOperand(i),
+ m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
+ X->getType() != SrcTy)
+ return nullptr;
+ NewArgs[i] = X;
+ }
+
+ // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
+ Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
+ Value *NewIntrinsic =
+ Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
+ return new ShuffleVectorInst(NewIntrinsic, Mask);
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -943,14 +1137,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
if (!CI.use_empty())
- if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
+ if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
if (isFreeCall(&CI, &TLI))
return visitFree(CI);
- // If the caller function is nounwind, mark the call as nounwind, even if the
- // callee isn't.
+ // If the caller function (i.e. us, the function that contains this CallInst)
+ // is nounwind, mark the call as nounwind, even if the callee isn't.
if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
CI.setDoesNotThrow();
return &CI;
@@ -980,13 +1174,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
if (NumBytes->isNullValue())
return eraseInstFromFunction(CI);
-
- if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
- if (CI->getZExtValue() == 1) {
- // Replace the instruction with just byte operations. We would
- // transform other cases to loads/stores, but we don't know if
- // alignment is sufficient.
- }
}
// No other transformations apply to volatile transfers.
@@ -1050,10 +1237,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return NewCall;
}
+ // Unused constrained FP intrinsic calls may have declared side effect, which
+ // prevents it from being removed. In some cases however the side effect is
+ // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
+ // returns a replacement, the call may be removed.
+ if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
+ if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
+ return eraseInstFromFunction(CI);
+ }
+
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::objectsize:
- if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+ if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
return replaceInstUsesWith(CI, V);
return nullptr;
case Intrinsic::abs: {
@@ -1224,6 +1420,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
+ if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
+ return NewMinMax;
+
+ if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
+ return R;
+
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
return NewMinMax;
@@ -1231,14 +1433,35 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
- Value *X = nullptr;
+
+ // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+ // inverse-shift-of-bswap:
+ // bswap (shl X, Y) --> lshr (bswap X), Y
+ // bswap (lshr X, Y) --> shl (bswap X), Y
+ Value *X, *Y;
+ if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
+ // The transform allows undef vector elements, so try a constant match
+ // first. If knownbits can handle that case, that clause could be removed.
+ unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
+ const APInt *C;
+ if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
+ MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
+ Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
+ BinaryOperator::BinaryOps InverseShift =
+ cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
+ ? Instruction::LShr
+ : Instruction::Shl;
+ return BinaryOperator::Create(InverseShift, NewSwap, Y);
+ }
+ }
KnownBits Known = computeKnownBits(IIOperand, 0, II);
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+ unsigned BW = Known.getBitWidth();
// bswap(x) -> shift(x) if x has exactly one "active byte"
- if (Known.getBitWidth() - LZ - TZ == 8) {
+ if (BW - LZ - TZ == 8) {
assert(LZ != TZ && "active byte cannot be in the middle");
if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
return BinaryOperator::CreateNUWShl(
@@ -1250,8 +1473,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
- unsigned C = X->getType()->getScalarSizeInBits() -
- IIOperand->getType()->getScalarSizeInBits();
+ unsigned C = X->getType()->getScalarSizeInBits() - BW;
Value *CV = ConstantInt::get(X->getType(), C);
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
@@ -1618,7 +1840,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
// Try to simplify the underlying FMul.
- if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
+ if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
@@ -1649,7 +1871,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Try to simplify the underlying FMul. We can only apply simplifications
// that do not require rounding.
- if (Value *V = SimplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
+ if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
II->getFastMathFlags(),
SQ.getWithInstruction(II))) {
auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
@@ -2135,7 +2357,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
Value *Vec = II->getArgOperand(0);
Value *SubVec = II->getArgOperand(1);
Value *Idx = II->getArgOperand(2);
@@ -2181,7 +2403,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
@@ -2456,11 +2678,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
default: {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
- if (V.hasValue())
+ if (V)
return V.getValue();
break;
}
}
+
+ if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+ return Shuf;
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
@@ -2648,47 +2874,56 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
-void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
+bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
+ const TargetLibraryInfo *TLI) {
// Note: We only handle cases which can't be driven from generic attributes
// here. So, for example, nonnull and noalias (which are common properties
// of some allocation functions) are expected to be handled via annotation
// of the respective allocator declaration with generic attributes.
+ bool Changed = false;
- uint64_t Size;
- ObjectSizeOpts Opts;
- if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
- // TODO: We really should just emit deref_or_null here and then
- // let the generic inference code combine that with nonnull.
- if (Call.hasRetAttr(Attribute::NonNull))
- Call.addRetAttr(Attribute::getWithDereferenceableBytes(
- Call.getContext(), Size));
- else
- Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Size));
+ if (isAllocationFn(&Call, TLI)) {
+ uint64_t Size;
+ ObjectSizeOpts Opts;
+ if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
+ // TODO: We really should just emit deref_or_null here and then
+ // let the generic inference code combine that with nonnull.
+ if (Call.hasRetAttr(Attribute::NonNull)) {
+ Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
+ Call.addRetAttr(
+ Attribute::getWithDereferenceableBytes(Call.getContext(), Size));
+ } else {
+ Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size));
+ }
+ }
}
// Add alignment attribute if alignment is a power of two constant.
Value *Alignment = getAllocAlignment(&Call, TLI);
if (!Alignment)
- return;
+ return Changed;
ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
uint64_t AlignmentVal = AlignOpC->getZExtValue();
if (llvm::isPowerOf2_64(AlignmentVal)) {
- Call.removeRetAttr(Attribute::Alignment);
- Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(),
- Align(AlignmentVal)));
+ Align ExistingAlign = Call.getRetAlign().valueOrOne();
+ Align NewAlign = Align(AlignmentVal);
+ if (NewAlign > ExistingAlign) {
+ Call.addRetAttr(
+ Attribute::getWithAlignment(Call.getContext(), NewAlign));
+ Changed = true;
+ }
}
}
+ return Changed;
}
/// Improvements for call, callbr and invoke instructions.
Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
- if (isAllocationFn(&Call, &TLI))
- annotateAnyAllocSite(Call, &TLI);
-
- bool Changed = false;
+ bool Changed = annotateAnyAllocSite(Call, &TLI);
// Mark any parameters that are known to be non-null with the nonnull
// attribute. This is helpful for inlining calls to functions with null
@@ -2718,10 +2953,12 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/callbr/invoke.
Value *Callee = Call.getCalledOperand();
- if (!isa<Function>(Callee) && transformConstExprCastCall(Call))
+ Function *CalleeF = dyn_cast<Function>(Callee);
+ if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
+ transformConstExprCastCall(Call))
return nullptr;
- if (Function *CalleeF = dyn_cast<Function>(Callee)) {
+ if (CalleeF) {
// Remove the convergent attr on calls when the callee is not convergent.
if (Call.isConvergent() && !CalleeF->isConvergent() &&
!CalleeF->isIntrinsic()) {
@@ -2905,7 +3142,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
Optional<OperandBundleUse> Bundle =
GCSP.getOperandBundle(LLVMContext::OB_gc_live);
unsigned NumOfGCLives = LiveGcValues.size();
- if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
+ if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
break;
// We can reduce the size of gc live bundle.
DenseMap<Value *, unsigned> Val2Idx;
@@ -3026,8 +3263,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
- Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
- Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
+ Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
return false;
auto AI = Call.arg_begin();
@@ -3038,12 +3274,15 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
+ // Check if there are any incompatible attributes we cannot drop safely.
if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
- .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
+ .overlaps(AttributeFuncs::typeIncompatible(
+ ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
return false; // Attribute not compatible with transformed value.
- if (Call.isInAllocaArgument(i))
- return false; // Cannot transform to and from inalloca.
+ if (Call.isInAllocaArgument(i) ||
+ CallerPAL.hasParamAttr(i, Attribute::Preallocated))
+ return false; // Cannot transform to and from inalloca/preallocated.
if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
return false;
@@ -3052,13 +3291,18 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getPointerElementType()->isSized())
+ if (!ParamPTy)
return false;
- Type *CurElTy = Call.getParamByValType(i);
- if (DL.getTypeAllocSize(CurElTy) !=
- DL.getTypeAllocSize(ParamPTy->getPointerElementType()))
- return false;
+ if (!ParamPTy->isOpaque()) {
+ Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
+ if (!ParamElTy->isSized())
+ return false;
+
+ Type *CurElTy = Call.getParamByValType(i);
+ if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
+ return false;
+ }
}
}
@@ -3116,13 +3360,20 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
Args.push_back(NewArg);
- // Add any parameter attributes.
- if (CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
- AttrBuilder AB(FT->getContext(), CallerPAL.getParamAttrs(i));
- AB.addByValAttr(NewArg->getType()->getPointerElementType());
+ // Add any parameter attributes except the ones incompatible with the new
+ // type. Note that we made sure all incompatible ones are safe to drop.
+ AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
+ ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
+ !ParamTy->isOpaquePointerTy()) {
+ AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
+ Ctx, IncompatibleAttrs));
+ AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
- } else
- ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
+ } else {
+ ArgAttrs.push_back(
+ CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
+ }
}
// If the function takes more arguments than the call was taking, add them