summaryrefslogtreecommitdiff
path: root/llvm/lib/Analysis/ConstantFolding.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Analysis/ConstantFolding.cpp')
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp527
1 files changed, 420 insertions, 107 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index b32924e6497a6..8c66decaaf58d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -37,7 +38,9 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
@@ -115,8 +118,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// to simplify things.
if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
- Type *SrcIVTy =
- VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+ auto *SrcIVTy = FixedVectorType::get(
+ IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
// Ask IR to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
}
@@ -152,11 +155,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// If the element types match, IR can fold it.
unsigned NumDstElt = DestVTy->getNumElements();
- unsigned NumSrcElt = C->getType()->getVectorNumElements();
+ unsigned NumSrcElt = cast<VectorType>(C->getType())->getNumElements();
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
- Type *SrcEltTy = C->getType()->getVectorElementType();
+ Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
Type *DstEltTy = DestVTy->getElementType();
// Otherwise, we're changing the number of elements in a vector, which
@@ -172,8 +175,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
if (DstEltTy->isFloatingPointTy()) {
// Fold to an vector of integers with same size as our FP type.
unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
- Type *DestIVTy =
- VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+ auto *DestIVTy = FixedVectorType::get(
+ IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
C = FoldBitCast(C, DestIVTy, DL);
@@ -185,8 +188,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// it to integer first.
if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
- Type *SrcIVTy =
- VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+ auto *SrcIVTy = FixedVectorType::get(
+ IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
// Ask IR to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
// If IR wasn't able to fold it, bail out.
@@ -215,7 +218,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
for (unsigned j = 0; j != Ratio; ++j) {
Constant *Src = C->getAggregateElement(SrcElt++);
if (Src && isa<UndefValue>(Src))
- Src = Constant::getNullValue(C->getType()->getVectorElementType());
+ Src = Constant::getNullValue(
+ cast<VectorType>(C->getType())->getElementType());
else
Src = dyn_cast_or_null<ConstantInt>(Src);
if (!Src) // Reject constantexpr elements.
@@ -329,10 +333,25 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
const DataLayout &DL) {
do {
Type *SrcTy = C->getType();
+ uint64_t DestSize = DL.getTypeSizeInBits(DestTy);
+ uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy);
+ if (SrcSize < DestSize)
+ return nullptr;
+
+ // Catch the obvious splat cases (since all-zeros can coerce non-integral
+ // pointers legally).
+ if (C->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+ !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+ return Constant::getAllOnesValue(DestTy);
// If the type sizes are the same and a cast is legal, just directly
// cast the constant.
- if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
+ // But be careful not to coerce non-integral pointers illegally.
+ if (SrcSize == DestSize &&
+ DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
+ DL.isNonIntegralPointerType(DestTy->getScalarType())) {
Instruction::CastOps Cast = Instruction::BitCast;
// If we are going from a pointer to int or vice versa, we spell the cast
// differently.
@@ -361,7 +380,7 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
Constant *ElemC;
do {
ElemC = C->getAggregateElement(Elem++);
- } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()) == 0);
+ } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
C = ElemC;
} else {
C = C->getAggregateElement(0u);
@@ -460,15 +479,18 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
- Type *EltTy = C->getType()->getSequentialElementType();
+ uint64_t NumElts;
+ Type *EltTy;
+ if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
+ NumElts = AT->getNumElements();
+ EltTy = AT->getElementType();
+ } else {
+ NumElts = cast<VectorType>(C->getType())->getNumElements();
+ EltTy = cast<VectorType>(C->getType())->getElementType();
+ }
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
- uint64_t NumElts;
- if (auto *AT = dyn_cast<ArrayType>(C->getType()))
- NumElts = AT->getNumElements();
- else
- NumElts = C->getType()->getVectorNumElements();
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
@@ -501,6 +523,10 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
const DataLayout &DL) {
+ // Bail out early. Not expect to load from scalable global variable.
+ if (isa<ScalableVectorType>(LoadTy))
+ return nullptr;
+
auto *PTy = cast<PointerType>(C->getType());
auto *IntType = dyn_cast<IntegerType>(LoadTy);
@@ -520,8 +546,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
else if (LoadTy->isDoubleTy())
MapTy = Type::getInt64Ty(C->getContext());
else if (LoadTy->isVectorTy()) {
- MapTy = PointerType::getIntNTy(C->getContext(),
- DL.getTypeSizeInBits(LoadTy));
+ MapTy = PointerType::getIntNTy(
+ C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize());
} else
return nullptr;
@@ -561,7 +587,8 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return nullptr;
int64_t Offset = OffsetAI.getSExtValue();
- int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType());
+ int64_t InitializerSize =
+ DL.getTypeAllocSize(GV->getInitializer()->getType()).getFixedSize();
// If we're not accessing anything in this constant, the result is undefined.
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
@@ -734,8 +761,7 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
return Op1;
}
- Known0.Zero |= Known1.Zero;
- Known0.One &= Known1.One;
+ Known0 &= Known1;
if (Known0.isConstant())
return ConstantInt::get(Op0->getType(), Known0.getConstant());
}
@@ -794,10 +820,7 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
Constant *C = ConstantExpr::getGetElementPtr(
SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
- if (Constant *Folded = ConstantFoldConstant(C, DL, TLI))
- C = Folded;
-
- return C;
+ return ConstantFoldConstant(C, DL, TLI);
}
/// Strip the pointer casts, but preserve the address space information.
@@ -828,7 +851,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Type *SrcElemTy = GEP->getSourceElementType();
Type *ResElemTy = GEP->getResultElementType();
Type *ResTy = GEP->getType();
- if (!SrcElemTy->isSized())
+ if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
return nullptr;
if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
@@ -857,9 +880,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResTy);
- if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
- Res = FoldedRes;
- return Res;
+ return ConstantFoldConstant(Res, DL, TLI);
}
}
return nullptr;
@@ -932,11 +953,11 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// Only handle pointers to sized types, not pointers to functions.
if (!Ty->isSized())
return nullptr;
- } else if (auto *ATy = dyn_cast<SequentialType>(Ty)) {
- Ty = ATy->getElementType();
} else {
- // We've reached some non-indexable type.
- break;
+ Type *NextTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0);
+ if (!NextTy)
+ break;
+ Ty = NextTy;
}
// Determine which element of the array the offset points into.
@@ -1062,7 +1083,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
- return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+ return ConstantExpr::getShuffleVector(
+ Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
}
}
@@ -1079,23 +1101,19 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
const TargetLibraryInfo *TLI,
SmallDenseMap<Constant *, Constant *> &FoldedOps) {
if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
- return nullptr;
+ return const_cast<Constant *>(C);
SmallVector<Constant *, 8> Ops;
- for (const Use &NewU : C->operands()) {
- auto *NewC = cast<Constant>(&NewU);
+ for (const Use &OldU : C->operands()) {
+ Constant *OldC = cast<Constant>(&OldU);
+ Constant *NewC = OldC;
// Recursively fold the ConstantExpr's operands. If we have already folded
// a ConstantExpr, we don't have to process it again.
- if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) {
- auto It = FoldedOps.find(NewC);
+ if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
+ auto It = FoldedOps.find(OldC);
if (It == FoldedOps.end()) {
- if (auto *FoldedC =
- ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
- FoldedOps.insert({NewC, FoldedC});
- NewC = FoldedC;
- } else {
- FoldedOps.insert({NewC, NewC});
- }
+ NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
+ FoldedOps.insert({OldC, NewC});
} else {
NewC = It->second;
}
@@ -1136,8 +1154,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
if (!C)
return nullptr;
// Fold the PHI's operands.
- if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps))
- C = FoldedC;
+ C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
@@ -1159,9 +1176,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
for (const Use &OpU : I->operands()) {
auto *Op = cast<Constant>(&OpU);
// Fold the Instruction's operands.
- if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps))
- Op = FoldedOp;
-
+ Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
Ops.push_back(Op);
}
@@ -1400,41 +1415,19 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
//
bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
- if (Call->isNoBuiltin() || Call->isStrictFP())
+ if (Call->isNoBuiltin())
return false;
switch (F->getIntrinsicID()) {
- case Intrinsic::fabs:
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- case Intrinsic::minimum:
- case Intrinsic::maximum:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::pow:
- case Intrinsic::powi:
+ // Operations that do not operate floating-point numbers and do not depend on
+ // FP environment can be folded even in strictfp functions.
case Intrinsic::bswap:
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::copysign:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
- case Intrinsic::round:
case Intrinsic::masked_load:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
@@ -1448,9 +1441,49 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::usub_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
+ case Intrinsic::bitreverse:
+ case Intrinsic::is_constant:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ return true;
+
+ // Floating point operations cannot be folded in strictfp functions in
+ // general case. They can be folded if FP environment is known to compiler.
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
- case Intrinsic::bitreverse:
+ case Intrinsic::amdgcn_cos:
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_cubema:
+ case Intrinsic::amdgcn_cubesc:
+ case Intrinsic::amdgcn_cubetc:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fract:
+ case Intrinsic::amdgcn_ldexp:
+ case Intrinsic::amdgcn_sin:
+ // The intrinsics below depend on rounding mode in MXCSR.
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse_cvttss2si:
@@ -1475,14 +1508,37 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_cvttsd2usi:
case Intrinsic::x86_avx512_cvttsd2usi64:
- case Intrinsic::is_constant:
+ return !Call->isStrictFP();
+
+ // Sign operations are actually bitwise operations, they do not raise
+ // exceptions even for SNANs.
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ // Non-constrained variants of rounding operations means default FP
+ // environment, they can be folded in any case.
+ case Intrinsic::ceil:
+ case Intrinsic::floor:
+ case Intrinsic::round:
+ case Intrinsic::roundeven:
+ case Intrinsic::trunc:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint:
+ // Constrained intrinsics can be folded if FP environment is known
+ // to compiler.
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_roundeven:
+ case Intrinsic::experimental_constrained_trunc:
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_rint:
return true;
default:
return false;
case Intrinsic::not_intrinsic: break;
}
- if (!F->hasName())
+ if (!F->hasName() || Call->isStrictFP())
return false;
// In these cases, the check of the length is required. We don't want to
@@ -1517,7 +1573,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case 'p':
return Name == "pow" || Name == "powf";
case 'r':
- return Name == "rint" || Name == "rintf" ||
+ return Name == "remainder" || Name == "remainderf" ||
+ Name == "rint" || Name == "rintf" ||
Name == "round" || Name == "roundf";
case 's':
return Name == "sin" || Name == "sinf" ||
@@ -1616,6 +1673,53 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
return GetConstantFoldFPValue(V, Ty);
}
+Constant *ConstantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
+ FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
+ if (!VT)
+ return nullptr;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
+ if (!CI)
+ return nullptr;
+ APInt Acc = CI->getValue();
+
+ for (unsigned I = 1; I < VT->getNumElements(); I++) {
+ if (!(CI = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
+ return nullptr;
+ const APInt &X = CI->getValue();
+ switch (IID) {
+ case Intrinsic::experimental_vector_reduce_add:
+ Acc = Acc + X;
+ break;
+ case Intrinsic::experimental_vector_reduce_mul:
+ Acc = Acc * X;
+ break;
+ case Intrinsic::experimental_vector_reduce_and:
+ Acc = Acc & X;
+ break;
+ case Intrinsic::experimental_vector_reduce_or:
+ Acc = Acc | X;
+ break;
+ case Intrinsic::experimental_vector_reduce_xor:
+ Acc = Acc ^ X;
+ break;
+ case Intrinsic::experimental_vector_reduce_smin:
+ Acc = APIntOps::smin(Acc, X);
+ break;
+ case Intrinsic::experimental_vector_reduce_smax:
+ Acc = APIntOps::smax(Acc, X);
+ break;
+ case Intrinsic::experimental_vector_reduce_umin:
+ Acc = APIntOps::umin(Acc, X);
+ break;
+ case Intrinsic::experimental_vector_reduce_umax:
+ Acc = APIntOps::umax(Acc, X);
+ break;
+ }
+ }
+
+ return ConstantInt::get(Op->getContext(), Acc);
+}
+
/// Attempt to fold an SSE floating point to integer conversion of a constant
/// floating point. If roundTowardZero is false, the default IEEE rounding is
/// used (toward nearest, ties to even). This matches the behavior of the
@@ -1756,6 +1860,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), U);
}
+ if (IntrinsicID == Intrinsic::roundeven) {
+ U.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+
if (IntrinsicID == Intrinsic::ceil) {
U.roundToIntegral(APFloat::rmTowardPositive);
return ConstantFP::get(Ty->getContext(), U);
@@ -1776,10 +1885,70 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), U);
}
+ if (IntrinsicID == Intrinsic::amdgcn_fract) {
+ // The v_fract instruction behaves like the OpenCL spec, which defines
+ // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
+ // there to prevent fract(-small) from returning 1.0. It returns the
+ // largest positive floating-point number less than 1.0."
+ APFloat FloorU(U);
+ FloorU.roundToIntegral(APFloat::rmTowardNegative);
+ APFloat FractU(U - FloorU);
+ APFloat AlmostOne(U.getSemantics(), 1);
+ AlmostOne.next(/*nextDown*/ true);
+ return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
+ }
+
+ // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
+ // raise FP exceptions, unless the argument is signaling NaN.
+
+ Optional<APFloat::roundingMode> RM;
+ switch (IntrinsicID) {
+ default:
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_rint: {
+ auto CI = cast<ConstrainedFPIntrinsic>(Call);
+ RM = CI->getRoundingMode();
+ if (!RM || RM.getValue() == RoundingMode::Dynamic)
+ return nullptr;
+ break;
+ }
+ case Intrinsic::experimental_constrained_round:
+ RM = APFloat::rmNearestTiesToAway;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ RM = APFloat::rmTowardPositive;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ RM = APFloat::rmTowardNegative;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ RM = APFloat::rmTowardZero;
+ break;
+ }
+ if (RM) {
+ auto CI = cast<ConstrainedFPIntrinsic>(Call);
+ if (U.isFinite()) {
+ APFloat::opStatus St = U.roundToIntegral(*RM);
+ if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
+ St == APFloat::opInexact) {
+ Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+ if (EB && *EB == fp::ebStrict)
+ return nullptr;
+ }
+ } else if (U.isSignaling()) {
+ Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+ if (EB && *EB != fp::ebIgnore)
+ return nullptr;
+ U = APFloat::getQNaN(U.getSemantics());
+ }
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
- if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ if (!U.isFinite())
return nullptr;
/// Currently APFloat versions of these functions do not exist, so we use
@@ -1809,6 +1978,26 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFoldFP(cos, V, Ty);
case Intrinsic::sqrt:
return ConstantFoldFP(sqrt, V, Ty);
+ case Intrinsic::amdgcn_cos:
+ case Intrinsic::amdgcn_sin:
+ if (V < -256.0 || V > 256.0)
+ // The gfx8 and gfx9 architectures handle arguments outside the range
+ // [-256, 256] differently. This should be a rare case so bail out
+ // rather than trying to handle the difference.
+ return nullptr;
+ bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
+ double V4 = V * 4.0;
+ if (V4 == floor(V4)) {
+ // Force exact results for quarter-integer inputs.
+ const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
+ V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
+ } else {
+ if (IsCos)
+ V = cos(V * 2.0 * numbers::pi);
+ else
+ V = sin(V * 2.0 * numbers::pi);
+ }
+ return GetConstantFoldFPValue(V, Ty);
}
if (!TLI)
@@ -1990,12 +2179,40 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
}
}
+ if (isa<ConstantAggregateZero>(Operands[0])) {
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ return ConstantInt::get(Ty, 0);
+ }
+ }
+
// Support ConstantVector in case we have an Undef in the top.
if (isa<ConstantVector>(Operands[0]) ||
isa<ConstantDataVector>(Operands[0])) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ if (Constant *C = ConstantFoldVectorReduce(IntrinsicID, Op))
+ return C;
+ break;
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse2_cvtsd2si:
@@ -2074,6 +2291,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
}
+ if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ // The legacy behaviour is that multiplying zero by anything, even NaN
+ // or infinity, gives +0.0.
+ if (C1.isZero() || C2.isZero())
+ return ConstantFP::getNullValue(Ty);
+ return ConstantFP::get(Ty->getContext(), C1 * C2);
+ }
+
if (!TLI)
return nullptr;
@@ -2097,6 +2324,14 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return ConstantFP::get(Ty->getContext(), V);
}
break;
+ case LibFunc_remainder:
+ case LibFunc_remainderf:
+ if (TLI->has(Func)) {
+ APFloat V = Op1->getValueAPF();
+ if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+ break;
case LibFunc_atan2:
case LibFunc_atan2f:
case LibFunc_atan2_finite:
@@ -2118,6 +2353,16 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return ConstantFP::get(Ty->getContext(),
APFloat((double)std::pow((double)Op1V,
(int)Op2C->getZExtValue())));
+
+ if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
+ // FIXME: Should flush denorms depending on FP mode, but that's ignored
+ // everywhere else.
+
+ // scalbn is equivalent to ldexp with float radix 2
+ APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), Result);
+ }
}
return nullptr;
}
@@ -2275,6 +2520,61 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return nullptr;
}
+static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
+ const APFloat &S0,
+ const APFloat &S1,
+ const APFloat &S2) {
+ unsigned ID;
+ const fltSemantics &Sem = S0.getSemantics();
+ APFloat MA(Sem), SC(Sem), TC(Sem);
+ if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
+ if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
+ // S2 < 0
+ ID = 5;
+ SC = -S0;
+ } else {
+ ID = 4;
+ SC = S0;
+ }
+ MA = S2;
+ TC = -S1;
+ } else if (abs(S1) >= abs(S0)) {
+ if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
+ // S1 < 0
+ ID = 3;
+ TC = -S2;
+ } else {
+ ID = 2;
+ TC = S2;
+ }
+ MA = S1;
+ SC = S0;
+ } else {
+ if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
+ // S0 < 0
+ ID = 1;
+ SC = S2;
+ } else {
+ ID = 0;
+ SC = -S2;
+ }
+ MA = S0;
+ TC = -S1;
+ }
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("unhandled amdgcn cube intrinsic");
+ case Intrinsic::amdgcn_cubeid:
+ return APFloat(Sem, ID);
+ case Intrinsic::amdgcn_cubema:
+ return MA + MA;
+ case Intrinsic::amdgcn_cubesc:
+ return SC;
+ case Intrinsic::amdgcn_cubetc:
+ return TC;
+ }
+}
+
static Constant *ConstantFoldScalarCall3(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
@@ -2295,6 +2595,15 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), V);
}
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_cubema:
+ case Intrinsic::amdgcn_cubesc:
+ case Intrinsic::amdgcn_cubetc: {
+ APFloat V = ConstantFoldAMDGCNCubeIntrinsic(
+ IntrinsicID, Op1->getValueAPF(), Op2->getValueAPF(),
+ Op3->getValueAPF());
+ return ConstantFP::get(Ty->getContext(), V);
+ }
}
}
}
@@ -2313,8 +2622,8 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
// how rounding should be done, and provide their own folding to be
// consistent with rounding. This is the same approach as used by
// DAGTypeLegalizer::ExpandIntRes_MULFIX.
- APInt Lhs = Op1->getValue();
- APInt Rhs = Op2->getValue();
+ const APInt &Lhs = Op1->getValue();
+ const APInt &Rhs = Op2->getValue();
unsigned Scale = Op3->getValue().getZExtValue();
unsigned Width = Lhs.getBitWidth();
assert(Scale < Width && "Illegal scale.");
@@ -2395,19 +2704,26 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
const CallBase *Call) {
- SmallVector<Constant *, 4> Result(VTy->getNumElements());
+ // Do not iterate on scalable vector. The number of elements is unknown at
+ // compile-time.
+ if (isa<ScalableVectorType>(VTy))
+ return nullptr;
+
+ auto *FVTy = cast<FixedVectorType>(VTy);
+
+ SmallVector<Constant *, 4> Result(FVTy->getNumElements());
SmallVector<Constant *, 4> Lane(Operands.size());
- Type *Ty = VTy->getElementType();
+ Type *Ty = FVTy->getElementType();
if (IntrinsicID == Intrinsic::masked_load) {
auto *SrcPtr = Operands[0];
auto *Mask = Operands[2];
auto *Passthru = Operands[3];
- Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, VTy, DL);
+ Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
SmallVector<Constant *, 32> NewElements;
- for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
+ for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
auto *MaskElt = Mask->getAggregateElement(I);
if (!MaskElt)
break;
@@ -2433,12 +2749,12 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
return nullptr;
}
}
- if (NewElements.size() != VTy->getNumElements())
+ if (NewElements.size() != FVTy->getNumElements())
return nullptr;
return ConstantVector::get(NewElements);
}
- for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
+ for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.
for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
// Some intrinsics use a scalar type for certain arguments.
@@ -2470,7 +2786,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (Call->isNoBuiltin() || Call->isStrictFP())
+ if (Call->isNoBuiltin())
return nullptr;
if (!F->hasName())
return nullptr;
@@ -2520,11 +2836,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
case LibFunc_expf:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
- return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
if (OpC->getType()->isFloatTy())
- return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
break;
case LibFunc_exp2l:
@@ -2532,11 +2846,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
case LibFunc_exp2f:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
- return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
if (OpC->getType()->isFloatTy())
- return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
break;
case LibFunc_sinl:
@@ -2566,10 +2878,8 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
case LibFunc_acosl:
case LibFunc_acos:
case LibFunc_acosf:
- return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
- APFloat::cmpLessThan &&
- Op.compare(APFloat(Op.getSemantics(), "1")) !=
- APFloat::cmpGreaterThan;
+ return !(Op < APFloat(Op.getSemantics(), "-1") ||
+ Op > APFloat(Op.getSemantics(), "1"));
case LibFunc_sinh:
case LibFunc_cosh:
@@ -2579,11 +2889,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
case LibFunc_coshl:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
- return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
if (OpC->getType()->isFloatTy())
- return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan &&
- Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
+ return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
break;
case LibFunc_sqrtl:
@@ -2626,6 +2934,9 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
case LibFunc_fmodl:
case LibFunc_fmod:
case LibFunc_fmodf:
+ case LibFunc_remainderl:
+ case LibFunc_remainder:
+ case LibFunc_remainderf:
return Op0.isNaN() || Op1.isNaN() ||
(!Op0.isInfinity() && !Op1.isZero());
@@ -2637,3 +2948,5 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
return false;
}
+
+void TargetFolder::anchor() {}