aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp894
1 files changed, 623 insertions, 271 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 30d202494320..5be1892a44f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -161,8 +162,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
unsigned SplatBitSize;
bool HasUndefs;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ // Endianness does not matter here. We are checking for a splat given the
+ // element size of the vector, and if we find such a splat for little endian
+ // layout, then that should be valid also for big endian (as the full vector
+ // size is known to be a multiple of the element size).
+ const bool IsBigEndian = false;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
- EltSize) &&
+ EltSize, IsBigEndian) &&
EltSize == SplatBitSize;
}
@@ -344,12 +350,13 @@ bool ISD::isFreezeUndef(const SDNode *N) {
return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
}
-bool ISD::matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match,
- bool AllowUndefs) {
+template <typename ConstNodeType>
+bool ISD::matchUnaryPredicateImpl(SDValue Op,
+ std::function<bool(ConstNodeType *)> Match,
+ bool AllowUndefs) {
// FIXME: Add support for scalar UNDEF cases?
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
- return Match(Cst);
+ if (auto *C = dyn_cast<ConstNodeType>(Op))
+ return Match(C);
// FIXME: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != Op.getOpcode() &&
@@ -364,12 +371,17 @@ bool ISD::matchUnaryPredicate(SDValue Op,
continue;
}
- auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i));
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
return false;
}
return true;
}
+// Build used template types.
+template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>(
+ SDValue, std::function<bool(ConstantSDNode *)>, bool);
+template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>(
+ SDValue, std::function<bool(ConstantFPSDNode *)>, bool);
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
@@ -951,7 +963,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
if (N->getValueType(0) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
switch (N->getOpcode()) {
default: break;
@@ -963,7 +975,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
if (N->getValueType(i) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
return false;
}
@@ -1197,7 +1209,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
#ifndef NDEBUG
// Verify that the node was actually in one of the CSE maps, unless it has a
- // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // glue result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
@@ -1296,17 +1308,16 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
}
Align SelectionDAG::getEVTAlign(EVT VT) const {
- Type *Ty = VT == MVT::iPTR ?
- PointerType::get(Type::getInt8Ty(*getContext()), 0) :
- VT.getTypeForEVT(*getContext());
+ Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0)
+ : VT.getTypeForEVT(*getContext());
return getDataLayout().getABITypeAlign(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL)
+ : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(),
+ getVTList(MVT::Other, MVT::Glue)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
@@ -1454,6 +1465,51 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
+SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getAnyExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getSExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getZExtOrTrunc(DestOp, DL, VT);
+}
+
SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
@@ -1570,7 +1626,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ APInt NewVal;
+ if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
+ NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
+ else
+ NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
@@ -1587,7 +1647,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
// For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
- if (VT.isScalableVector()) {
+ if (VT.isScalableVector() ||
+ TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
"Can only handle an even split!");
unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
@@ -1801,6 +1862,13 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain,
+ const SDLoc &DL) {
+ EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout());
+ return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain,
+ getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true));
+}
+
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
MaybeAlign Alignment, int Offset,
bool isTarget, unsigned TargetFlags) {
@@ -1855,23 +1923,6 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned TargetFlags) {
- FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
- ID.AddInteger(Index);
- ID.AddInteger(Offset);
- ID.AddInteger(TargetFlags);
- void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, IP))
- return SDValue(E, 0);
-
- auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- return SDValue(N, 0);
-}
-
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
@@ -1950,13 +2001,10 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
if (ConstantFold) {
const MachineFunction &MF = getMachineFunction();
- auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
- if (Attr.isValid()) {
- unsigned VScaleMin = Attr.getVScaleRangeMin();
- if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
- if (*VScaleMax == VScaleMin)
- return getConstant(MulImm * VScaleMin, DL, VT);
- }
+ const Function &F = MF.getFunction();
+ ConstantRange CR = getVScaleRange(&F, 64);
+ if (const APInt *C = CR.getSingleElement())
+ return getConstant(MulImm * C->getZExtValue(), DL, VT);
}
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
@@ -2121,11 +2169,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (SameNumElts)
+ if (SameNumElts || isNullConstant(Splat))
return N1;
- if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isZero())
- return N1;
}
// If the shuffle itself creates a splat, build the vector directly.
@@ -2490,7 +2535,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
- if (N1 == N2)
+ if (N1.isUndef() || N2.isUndef() || N1 == N2)
return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
}
@@ -2836,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
}
+ // Fallback - this is a splat if all demanded elts are the same constant.
+ if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
+ UndefElts = ~DemandedElts;
+ return true;
+ }
+
return false;
}
@@ -3057,6 +3108,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
break;
}
+ case ISD::SPLAT_VECTOR_PARTS: {
+ unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits();
+ assert(ScalarSize * Op.getNumOperands() == BitWidth &&
+ "Expected SPLAT_VECTOR_PARTS scalars to cover element width");
+ for (auto [I, SrcOp] : enumerate(Op->ops())) {
+ Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I);
+ }
+ break;
+ }
case ISD::BUILD_VECTOR:
assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
@@ -3688,14 +3748,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
- // TODO: Compute influence of the carry operand.
- if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
- break;
+ // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in.
+ KnownBits Borrow(1);
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) {
+ Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Borrow has bit width 1
+ Borrow = Borrow.trunc(1);
+ } else {
+ Borrow.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
- Known, Known2);
+ Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow);
break;
}
case ISD::UADDO:
@@ -3720,15 +3785,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
- // TODO: Compute known bits for the carry operand. Not sure if it is worth
- // the trouble (how often will we find a known carry bit). And I haven't
- // tested this very much yet, but something like this might work:
- // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
- // Carry = Carry.zextOrTrunc(1, false);
- Carry.resetAll();
- else
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) {
+ Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry has bit width 1
+ Carry = Carry.trunc(1);
+ } else {
Carry.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -4047,8 +4110,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
return OFK_Never;
- // TODO: Add ConstantRange::signedSubMayOverflow handling.
- return OFK_Sometime;
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+ return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
@@ -4057,7 +4123,53 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
if (isNullConstant(N1))
return OFK_Never;
- // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ // Get the size of the result.
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+
+ // Sum of the sign bits.
+ unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1);
+
+ // If we have enough sign bits, then there's no overflow.
+ if (SignBits > BitWidth + 1)
+ return OFK_Never;
+
+ if (SignBits == BitWidth + 1) {
+ // The overflow occurs when the true multiplication of the
+ // the operands is the minimum negative number.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ // If one of the operands is non-negative, then there's no
+ // overflow.
+ if (N0Known.isNonNegative() || N1Known.isNonNegative())
+ return OFK_Never;
+ }
+
return OFK_Sometime;
}
@@ -4069,8 +4181,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
unsigned BitWidth = OpVT.getScalarSizeInBits();
// Is the constant a known power of 2?
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
- return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) {
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }))
+ return true;
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
@@ -4078,6 +4192,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
// Similarly, a logical right-shift of a constant sign-bit will have exactly
@@ -4086,8 +4202,13 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue().isSignMask())
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
+ if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
@@ -4109,6 +4230,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
return true;
+ if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX ||
+ Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1);
+
+ if (Val.getOpcode() == ISD::AND) {
+ // Looking for `x & -x` pattern:
+ // If x == 0:
+ // x & -x -> 0
+ // If x != 0:
+ // x & -x -> non-zero pow2
+ // so if we find the pattern return whether we know `x` is non-zero.
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ SDValue NegOp = Val.getOperand(OpIdx);
+ if (NegOp.getOpcode() == ISD::SUB &&
+ NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) &&
+ isNullOrNullSplat(NegOp.getOperand(0)))
+ return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth);
+ }
+ }
+
+ if (Val.getOpcode() == ISD::ZERO_EXTEND)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// More could be done here, though the above checks are enough
// to handle some common cases.
return false;
@@ -4869,8 +5018,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case ISD::AssertSext:
- case ISD::AssertZext:
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
@@ -4886,7 +5033,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BITREVERSE:
case ISD::PARITY:
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -4896,6 +5042,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;
+ // Matches hasPoisonGeneratingFlags().
+ case ISD::ZERO_EXTEND:
+ return ConsiderFlags && Op->getFlags().hasNonNeg();
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
@@ -4932,6 +5082,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return true;
}
+bool SelectionDAG::isADDLike(SDValue Op) const {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::OR)
+ return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(Op.getOperand(1));
+ return false;
+}
+
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
@@ -4977,12 +5136,15 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FCANONICALIZE:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FLDEXP: {
if (SNaN)
@@ -5112,21 +5274,29 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(2), Depth + 1);
- case ISD::SHL:
+ case ISD::SHL: {
if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
-
- // 1 << X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ // 1 << X is never zero.
+ if (ValKnown.One[0])
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.shl(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UADDSAT:
case ISD::UMAX:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // TODO for smin/smax: If either operand is known negative/positive
+ // respectively we don't need the other to be known at all.
+ case ISD::SMAX:
+ case ISD::SMIN:
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1);
@@ -5140,16 +5310,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
case ISD::SRA:
- case ISD::SRL:
+ case ISD::SRL: {
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
- // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really
- // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (ValKnown.isNegative())
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.lshr(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UDIV:
case ISD::SDIV:
// div exact can only produce a zero if the dividend is zero.
@@ -5425,161 +5598,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
- // Constant fold unary operations with an integer constant operand. Even
- // opaque constant will be folded, because the folding of unary operations
- // doesn't create new constants with different values. Nevertheless, the
- // opaque flag is preserved during folding to prevent future folding with
- // other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- const APInt &Val = C->getAPIntValue();
- switch (Opcode) {
- default: break;
- case ISD::SIGN_EXTEND:
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::TRUNCATE:
- if (C->isOpaque())
- break;
- [[fallthrough]];
- case ISD::ZERO_EXTEND:
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::ANY_EXTEND:
- // Some targets like RISCV prefer to sign extend some types.
- if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::UINT_TO_FP:
- case ISD::SINT_TO_FP: {
- APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getZero(VT.getSizeInBits()));
- (void)apf.convertFromAPInt(Val,
- Opcode==ISD::SINT_TO_FP,
- APFloat::rmNearestTiesToEven);
- return getConstantFP(apf, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
- return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
- if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
- if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
- if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
- return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
- break;
- case ISD::ABS:
- return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BITREVERSE:
- return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BSWAP:
- return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTPOP:
- return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::FP16_TO_FP:
- case ISD::BF16_TO_FP: {
- bool Ignored;
- APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
-
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)FPV.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstantFP(FPV, DL, VT);
- }
- case ISD::STEP_VECTOR: {
- if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
- return V;
- break;
- }
- }
- }
-
- // Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
- APFloat V = C->getValueAPF(); // make copy
- switch (Opcode) {
- case ISD::FNEG:
- V.changeSign();
- return getConstantFP(V, DL, VT);
- case ISD::FABS:
- V.clearSign();
- return getConstantFP(V, DL, VT);
- case ISD::FCEIL: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FTRUNC: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FFLOOR: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FP_EXTEND: {
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
- return getConstantFP(V, DL, VT);
- }
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
- bool ignored;
- APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
- // FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s =
- V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
- if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
- break;
- return getConstant(IntVal, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
- return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- break;
- case ISD::FP_TO_FP16:
- case ISD::FP_TO_BF16: {
- bool Ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- }
- }
- }
// Constant fold unary operations with a vector integer or float operand.
switch (Opcode) {
@@ -5595,12 +5613,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
case ISD::TRUNCATE:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP:
+ case ISD::BITCAST:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -5608,7 +5631,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTPOP: {
+ case ISD::CTPOP:
+ case ISD::STEP_VECTOR: {
SDValue Ops = {N1};
if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
@@ -5697,6 +5721,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
+
+ // Skip unnecessary zext_inreg pattern:
+ // (zext (trunc x)) -> x iff the upper bits are known zero.
+ // TODO: Remove (zext (trunc (and x, c))) exception which some targets
+ // use to recognise zext_inreg patterns.
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = N1.getOperand(0);
+ if (OpOp.getValueType() == VT) {
+ if (OpOp.getOpcode() != ISD::AND) {
+ APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(),
+ N1.getScalarValueSizeInBits());
+ if (MaskedValueIsZero(OpOp, HiBits)) {
+ transferDbgValues(N1, OpOp);
+ return OpOp;
+ }
+ }
+ }
+ }
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && N1.getValueType().isInteger() &&
@@ -5853,7 +5895,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1};
- if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE glue producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
@@ -6040,9 +6082,174 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
+ // Handle unary special cases.
+ if (NumOps == 1) {
+ SDValue N1 = Ops[0];
+
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (auto *C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ // Some targets like RISCV prefer to sign extend some types.
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getZero(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
+ case ISD::STEP_VECTOR:
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
+ return V;
+ break;
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
+ break;
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool ignored;
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ return getConstant(IntVal, DL, VT);
+ }
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ }
+ }
+
+ // Early-out if we failed to constant fold a bitcast.
+ if (Opcode == ISD::BITCAST)
+ return SDValue();
+ }
+
// Handle binops special cases.
if (NumOps == 2) {
- if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops))
return CFP;
if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
@@ -6235,11 +6442,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDValue N1, SDValue N2) {
+ EVT VT, ArrayRef<SDValue> Ops) {
+ // TODO: Add support for unary/ternary fp opcodes.
+ if (Ops.size() != 2)
+ return SDValue();
+
// TODO: We don't do any constant folding for strict FP opcodes here, but we
// should. That will require dealing with a potentially non-default
// rounding mode, checking the "opStatus" return value from the APFloat
// math calculations, and possibly other variations.
+ SDValue N1 = Ops[0];
+ SDValue N2 = Ops[1];
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
if (N1CFP && N2CFP) {
@@ -6600,6 +6813,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
return getBuildVector(VT, DL, Ops);
}
+
+ if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ isa<ConstantSDNode>(N1.getOperand(0)))
+ return getNode(
+ ISD::SPLAT_VECTOR, DL, VT,
+ SignExtendInReg(N1.getConstantOperandAPInt(0),
+ N1.getOperand(0).getValueType()));
break;
}
case ISD::FP_TO_SINT_SAT:
@@ -6868,7 +7088,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Operand is DELETED_NODE!");
// Perform various simplifications.
switch (Opcode) {
- case ISD::FMA: {
+ case ISD::FMA:
+ case ISD::FMAD: {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == VT && N2.getValueType() == VT &&
N3.getValueType() == VT && "FMA types must match!");
@@ -6879,7 +7100,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
- V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ if (Opcode == ISD::FMAD) {
+ V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ V1.add(V3, APFloat::rmNearestTiesToEven);
+ } else
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
return getConstantFP(V1, DL, VT);
}
break;
@@ -7001,7 +7226,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
- // Memoize node if it doesn't produce a flag.
+ // Memoize node if it doesn't produce a glue result.
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1, N2, N3};
@@ -7342,7 +7567,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Value.getNode()) {
Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
}
@@ -7367,14 +7592,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(
ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
OutStoreChains.push_back(Store);
}
@@ -7511,7 +7736,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getLoad(
VT, dl, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
@@ -7526,7 +7751,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getStore(
Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
DstOff += VTSize;
@@ -7631,19 +7856,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
// If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
+ // the smaller value for free with a truncate or extract vector element and
+ // then store.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
+ unsigned Index;
+ unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
+ EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
- else
+ else if (LargestVT.isVector() && !VT.isVector() &&
+ TLI.shallExtractConstSplatVectorElementToStore(
+ LargestVT.getTypeForEVT(*DAG.getContext()),
+ VT.getSizeInBits(), Index) &&
+ TLI.isTypeLegal(SVT) &&
+ LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
+ // Target which can combine store(extractelement VectorTy, Idx) can get
+ // the smaller value for free.
+ SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
+ Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
+ DAG.getVectorIdxConstant(Index, dl));
+ } else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
NewAAInfo);
@@ -7717,7 +7957,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7819,7 +8059,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7933,8 +8173,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// FIXME: pass in SDLoc
CLI.setDebugLoc(dl).setChain(Chain);
- ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
- const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
// Helper function to create an Entry from Node and Type.
@@ -7946,16 +8184,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
};
// If zeroing out and bzero is present, use it.
- if (SrcIsZero && BzeroName) {
+ if (isNullConstant(Src) && BzeroName) {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(
TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
} else {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
@@ -8127,7 +8365,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
MemIntrinsicSDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -9645,6 +9883,27 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]);
return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
}
+
+ if (VTList.VTs[0].isVector() &&
+ VTList.VTs[0].getVectorElementType() == MVT::i1 &&
+ VTList.VTs[1].getVectorElementType() == MVT::i1) {
+ SDValue F1 = getFreeze(N1);
+ SDValue F2 = getFreeze(N2);
+ // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)}
+ if (Opcode == ISD::UADDO || Opcode == ISD::SADDO)
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)},
+ Flags);
+ // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)}
+ if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) {
+ SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)},
+ Flags);
+ }
+ }
break;
}
case ISD::SMUL_LOHI:
@@ -9654,6 +9913,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
VTList.VTs[0] == Ops[0].getValueType() &&
VTList.VTs[0] == Ops[1].getValueType() &&
"Binary operator types must match!");
+ // Constant fold.
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]);
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]);
+ if (LHS && RHS) {
+ unsigned Width = VTList.VTs[0].getScalarSizeInBits();
+ unsigned OutWidth = Width * 2;
+ APInt Val = LHS->getAPIntValue();
+ APInt Mul = RHS->getAPIntValue();
+ if (Opcode == ISD::SMUL_LOHI) {
+ Val = Val.sext(OutWidth);
+ Mul = Mul.sext(OutWidth);
+ } else {
+ Val = Val.zext(OutWidth);
+ Mul = Mul.zext(OutWidth);
+ }
+ Val *= Mul;
+
+ SDValue Hi =
+ getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]);
+ SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags);
+ }
break;
}
case ISD::FFREXP: {
@@ -9727,7 +10008,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
#endif
}
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
SDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -10100,7 +10381,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
/// For IROrder, we keep the smaller of the two
SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
- if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
}
unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -10569,11 +10850,18 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
switch (N.getOpcode()) {
default:
break;
- case ISD::ADD:
+ case ISD::ADD: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
- uint64_t Offset = N.getConstantOperandVal(1);
+ if (!isa<ConstantSDNode>(N0)) {
+ bool RHSConstant = isa<ConstantSDNode>(N1);
+ uint64_t Offset;
+ if (RHSConstant)
+ Offset = N.getConstantOperandVal(1);
+ // We are not allowed to turn indirect debug values variadic, so
+ // don't salvage those.
+ if (!RHSConstant && DV->isIndirect())
+ continue;
// Rewrite an ADD constant node into a DIExpression. Since we are
// performing arithmetic to compute the variable's *value* in the
@@ -10582,7 +10870,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
auto *DIExpr = DV->getExpression();
auto NewLocOps = DV->copyLocationOps();
bool Changed = false;
- for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ size_t OrigLocOpsSize = NewLocOps.size();
+ for (size_t i = 0; i < OrigLocOpsSize; ++i) {
// We're not given a ResNo to compare against because the whole
// node is going away. We know that any ISD::ADD only has one
// result, so we can assume any node match is using the result.
@@ -10590,19 +10879,37 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
NewLocOps[i].getSDNode() != &N)
continue;
NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
- SmallVector<uint64_t, 3> ExprOps;
- DIExpression::appendOffset(ExprOps, Offset);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ if (RHSConstant) {
+ SmallVector<uint64_t, 3> ExprOps;
+ DIExpression::appendOffset(ExprOps, Offset);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ } else {
+ // Convert to a variadic expression (if not already).
+ // convertToVariadicExpression() returns a const pointer, so we use
+ // a temporary const variable here.
+ const auto *TmpDIExpr =
+ DIExpression::convertToVariadicExpression(DIExpr);
+ SmallVector<uint64_t, 3> ExprOps;
+ ExprOps.push_back(dwarf::DW_OP_LLVM_arg);
+ ExprOps.push_back(NewLocOps.size());
+ ExprOps.push_back(dwarf::DW_OP_plus);
+ SDDbgOperand RHS =
+ SDDbgOperand::fromNode(N1.getNode(), N1.getResNo());
+ NewLocOps.push_back(RHS);
+ DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true);
+ }
Changed = true;
}
(void)Changed;
assert(Changed && "Salvage target doesn't use N");
+ bool IsVariadic =
+ DV->isVariadic() || OrigLocOpsSize != NewLocOps.size();
+
auto AdditionalDependencies = DV->getAdditionalDependencies();
- SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr,
- NewLocOps, AdditionalDependencies,
- DV->isIndirect(), DV->getDebugLoc(),
- DV->getOrder(), DV->isVariadic());
+ SDDbgValue *Clone = getDbgValueList(
+ DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies,
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic);
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
DV->setIsEmitted();
@@ -10610,6 +10917,41 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
}
+ break;
+ }
+ case ISD::TRUNCATE: {
+ SDValue N0 = N.getOperand(0);
+ TypeSize FromSize = N0.getValueSizeInBits();
+ TypeSize ToSize = N.getValueSizeInBits(0);
+
+ DIExpression *DbgExpression = DV->getExpression();
+ auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false);
+ auto NewLocOps = DV->copyLocationOps();
+ bool Changed = false;
+ for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE ||
+ NewLocOps[i].getSDNode() != &N)
+ continue;
+
+ NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
+ DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i);
+ Changed = true;
+ }
+ assert(Changed && "Salvage target doesn't use N");
+ (void)Changed;
+
+ SDDbgValue *Clone =
+ getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps,
+ DV->getAdditionalDependencies(), DV->isIndirect(),
+ DV->getDebugLoc(), DV->getOrder(), DV->isVariadic());
+
+ ClonedDVs.push_back(Clone);
+ DV->setIsInvalidated();
+ DV->setIsEmitted();
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DbgExpression << '\n');
+ break;
+ }
}
}
@@ -12113,6 +12455,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
// FIXME: This does not work for vectors with elements less than 8 bits.
while (VecWidth > 8) {
+ // If we can't split in half, stop here.
+ if (VecWidth & 1)
+ break;
+
unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize);
APInt LowValue = SplatValue.extractBits(HalfSize, 0);
@@ -12130,6 +12476,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
VecWidth = HalfSize;
}
+ // FIXME: The loop above only tries to split in halves. But if the input
+ // vector for example is <3 x i16> it wouldn't be able to detect a
+ // SplatBitSize of 16. No idea if that is a design flaw currently limiting
+ // optimizations. I guess that back in the days when this helper was created
+ // vectors normally was power-of-2 sized.
+
SplatBitSize = VecWidth;
return true;
}