aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp946
1 files changed, 636 insertions, 310 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc245f0d7b16..ce400ea43f29 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable reducing the width of load/op/store "
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
@@ -319,7 +319,7 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
@@ -345,7 +345,7 @@ namespace {
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
@@ -436,7 +436,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
- SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -515,6 +515,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
@@ -615,7 +616,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(SDNode *Op0, SDNode *Op1) const;
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ return SDValue();
}
}
return SDValue();
@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
}
return SDValue();
}
@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
@@ -2781,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- IsFlip = Const->isAllOnesValue();
+ IsFlip = Const->isAllOnes();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
@@ -3257,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
@@ -3315,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
- TLI.expandABS(N1.getNode(), Result, DAG, true))
- return Result;
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
@@ -3783,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3808,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1.isNullValue())
+ if (N1IsConst && ConstValue1.isZero())
return N1;
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue())
+ if (N1IsConst && ConstValue1.isOne())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ if (N1IsConst && ConstValue1.isAllOnes()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
@@ -3837,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -3966,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
- if (!V || V->isNullValue()) {
+ if (!V || V->isZero()) {
ClearMask.push_back(true);
return true;
}
@@ -4052,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Op0.getNode()->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4111,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isNullValue())
+ if (N0C && N0C->isZero())
return N0;
// X / X -> 1
@@ -4136,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
@@ -4204,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
- if (C->isNullValue() || C->isOpaque())
+ if (C->isZero() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
- if ((-C->getAPIntValue()).isPowerOf2())
+ if (C->getAPIntValue().isNegatedPowerOf2())
return true;
return false;
};
@@ -4281,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
- if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4391,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (!isSigned && N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
@@ -4475,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4527,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4567,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
}
}
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4768,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold operation with constant operands.
- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -4797,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
@@ -5607,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5618,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
@@ -5679,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(BitWidth)))
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -5742,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
- APInt Constant = APInt::getNullValue(1);
+ APInt Constant = APInt::getZero(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
@@ -5773,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnesValue(EltBitWidth);
+ Constant = APInt::getAllOnes(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
@@ -5800,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
case ISD::NON_EXTLOAD: B = true; break;
}
- if (B && Constant.isAllOnesValue()) {
+ if (B && Constant.isAllOnes()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
@@ -5970,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
return SDValue();
}
@@ -6384,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -6925,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
- // Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
// The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6988,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
@@ -7016,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
- if (IsRotate && (HasROTL || HasROTR))
- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt);
- else
- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -7045,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
@@ -7296,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations)
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -7671,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// | D |
// Into:
// (x & m) | (y & ~m)
-// If y is a constant, and the 'andn' does not work with immediates,
-// we unfold into a different pattern:
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
@@ -7728,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDLoc DL(N);
- // If Y is a constant, check that 'andn' works with immediates.
- if (!TLI.hasAndNot(Y)) {
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
@@ -7739,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
@@ -7750,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
@@ -7764,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7899,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnesValue(BitWidth);
+ APInt Ones = APInt::getAllOnes(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
@@ -8222,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -8255,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSel;
// if (shl x, c) is known to be zero, return 0
- if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
@@ -8501,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
+
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ if (!IsSignExt && !IsZeroExt)
return SDValue();
- EVT WideVT1 = LeftOp.getValueType();
- EVT WideVT2 = RightOp.getValueType();
- (void)WideVT2;
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getMinSignedBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
// Proceed with the transformation if the wide types match.
- assert((WideVT1 == WideVT2) &&
+ assert((WideVT == RightOp.getValueType()) &&
"Cannot have a multiply node with two different operand types.");
- EVT NarrowVT = LeftOp.getOperand(0).getValueType();
- // Check that the two extend nodes are the same type.
- if (NarrowVT != RightOp.getOperand(0).getValueType())
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
@@ -8540,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
- RightOp.getOperand(0));
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
- : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -8563,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8761,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8774,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSel;
// if (srl x, c) is known to be zero, return 0
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
@@ -9357,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isNullValue() && C2->isOne()) {
+ if (C1->isZero() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isNullValue() && C2->isAllOnesValue()) {
+ if (C1->isZero() && C2->isAllOnes()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isOne() && C2->isNullValue()) {
+ if (C1->isOne() && C2->isZero()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
- if (C1->isAllOnesValue() && C2->isNullValue()) {
+ if (C1->isAllOnes() && C2->isZero()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -9405,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
@@ -9433,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
- C1->isNullValue() && C2->isOne()) {
+ C1->isZero() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
@@ -9478,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9702,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
@@ -10168,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectOfConstants(N))
return V;
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10189,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isNullValue())
+ if (!SCCC->isZero())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
@@ -10247,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
@@ -10862,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -11256,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
Known = DAG.computeKnownBits(Op);
- return (Known.Zero | 1).isAllOnesValue();
+ return (Known.Zero | 1).isAllOnes();
}
/// Given an extending node with a pop-count operand, if the target does not
@@ -12015,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12031,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12051,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
- ExtVTBits))) &&
+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12289,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -12537,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
@@ -12546,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
@@ -12937,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
- SDLoc DL(BV);
-
// Okay, we know the src/dst types are both integers of differing types.
- // Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
- if (SrcBitSize < DstBitSize) {
- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e;
- i += NumInputsPerOutput) {
- bool isLE = DAG.getDataLayout().isLittleEndian();
- APInt NewBits = APInt(DstBitSize, 0);
- bool EltIsUndef = true;
- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
- // Shift the previously computed bits over.
- NewBits <<= SrcBitSize;
- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.isUndef()) continue;
- EltIsUndef = false;
-
- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
- zextOrTrunc(SrcBitSize).zext(DstBitSize);
- }
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
- if (EltIsUndef)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
- else
- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
- }
-
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
- // Finally, this must be the case where we are shrinking elements: each input
- // turns into multiple outputs.
- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- NumOutputsPerInput*BV->getNumOperands());
+ SDLoc DL(BV);
SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
- for (const SDValue &Op : BV->op_values()) {
- if (Op.isUndef()) {
- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
- continue;
- }
-
- APInt OpVal = cast<ConstantSDNode>(Op)->
- getAPIntValue().zextOrTrunc(SrcBitSize);
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal.lshrInPlace(DstBitSize);
- }
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
- // For big endian targets, swap the order of the pieces of each element.
- if (DAG.getDataLayout().isBigEndian())
- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
- }
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
- return DAG.getBuildVector(VT, DL, Ops);
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13038,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
@@ -13069,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ } else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
@@ -13130,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13160,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13174,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N1)) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
@@ -13195,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N10)) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13391,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
@@ -13409,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
@@ -13423,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0->hasOneUse()) {
+ if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13450,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13470,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -13495,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -13537,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
@@ -13632,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
@@ -13840,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
@@ -13925,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
}
@@ -13970,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
@@ -14259,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
@@ -16244,11 +16381,12 @@ struct LoadedSlice {
return false;
// Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlign())
+ // 1. Check the alignment / fast memory access constraint.
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return false;
// 2. Check that the load is a legal operation for that type.
@@ -16269,7 +16407,7 @@ struct LoadedSlice {
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
+ if (UsedBits.isAllOnes())
return true;
// Get rid of the unused bits on the right.
@@ -16278,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
+ return NarrowedUsedBits.isAllOnes();
}
/// Check whether or not \p First and \p Second are next to each other
@@ -16696,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
- Imm ^= APInt::getAllOnesValue(BitWidth);
- if (Imm == 0 || Imm.isAllOnesValue())
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
@@ -16724,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnesValue(NewBW);
+ NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ bool IsFast = false;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return SDValue();
SDValue NewPtr =
@@ -16787,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
+ bool FastLD = false, FastST = false;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
- return SDValue();
-
- Align LDAlign = LD->getAlign();
- Align STAlign = ST->getAlign();
- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
- if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LDAlign);
+ LD->getPointerInfo(), LD->getAlign());
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
- ST->getPointerInfo(), STAlign);
+ ST->getPointerInfo(), ST->getAlign());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -16838,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &ConstNode) {
APInt Val;
- // If the add only has one use, this would be OK to do.
- if (AddNode.getNode()->hasOneUse())
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
// Walk all the users of the constant with which we're multiplying.
@@ -16931,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ Optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
@@ -17048,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore =
- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstInChain->getAlign());
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17062,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
}
// Replace all merged stores with the new store.
@@ -17359,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
+ IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
@@ -17378,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
break;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17390,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17409,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17485,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17633,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
bool IsFastSt = false;
bool IsFastLd = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17648,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17662,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
@@ -18214,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
- if (!isAlias(Chain.getNode(), N))
+ if (!mayAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
@@ -18592,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!VecEltVT.isByteSized())
return SDValue();
- Align Alignment = OriginalLoad->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Alignment ||
- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
- ISD::NON_EXTLOAD : ISD::EXTLOAD;
- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Alignment = NewAlign;
-
+ Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
+
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
@@ -18863,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
@@ -18876,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
@@ -19671,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
int64_t Offset = checkElem(Op0);
- if (Offset < 0)
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
@@ -19843,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
@@ -20102,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ }
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
@@ -20350,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
return SDValue();
// Allow targets to opt-out.
@@ -20362,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (!VT.isByteSized())
return SDValue();
- unsigned Index = ExtIdx->getZExtValue();
+ unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
@@ -20491,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
- if (ConcatSrcNumElts == ExtNumElts)
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
@@ -20499,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
@@ -21134,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
- if (N0 == N1) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= (int)NumElts) Idx -= NumElts;
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
- }
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
@@ -21293,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -21862,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled)
+ return SDValue();
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -21918,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
- if (Bits.isAllOnesValue())
+ if (Bits.isAllOnes())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
@@ -21953,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -21974,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
- SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
@@ -21998,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
}
/// Visit a binary vector operation, like ADD.
-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVBinOp only works on vectors!");
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
- EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
+ LHS.getValueType(), Ops))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
@@ -22029,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
- SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
@@ -22046,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
- SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22056,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
- SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22080,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
- SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
@@ -22107,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
- SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
@@ -22120,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
return V;
return SDValue();
@@ -22434,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
return SDValue();
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22456,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22584,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
- return !(SCCC->isNullValue()) ? N2 : N3;
+ return !(SCCC->isZero()) ? N2 : N3;
}
}
@@ -22683,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
@@ -22710,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
return SDValue();
}
@@ -22750,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (C->isNullValue())
+ if (C->isZero())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -22795,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
-/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
@@ -22806,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22945,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22997,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
@@ -23157,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
@@ -23175,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
- if (!isAlias(N, C.getNode())) {
+ if (!mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;