summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
commitc0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch)
treef42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/CodeGen/SelectionDAG
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp959
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp23
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp52
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp505
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp80
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp420
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp918
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp426
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h203
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp35
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp180
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp481
22 files changed, 2850 insertions, 1568 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b104e995019f..ce400ea43f29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable reducing the width of load/op/store "
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
@@ -319,7 +319,7 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
@@ -345,7 +345,7 @@ namespace {
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
@@ -436,7 +436,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
- SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -515,6 +515,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
@@ -615,7 +616,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(SDNode *Op0, SDNode *Op1) const;
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ return SDValue();
}
}
return SDValue();
@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
}
return SDValue();
}
@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
@@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
N0.getOperand(0));
// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
- if (N0.getOpcode() == ISD::ADD ||
- N0.getOpcode() == ISD::UADDO ||
- N0.getOpcode() == ISD::SADDO) {
+ if (N0.getOpcode() == ISD::ADD) {
SDValue A, Xor;
if (isBitwiseNot(N0.getOperand(0))) {
@@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- IsFlip = Const->isAllOnesValue();
+ IsFlip = Const->isAllOnes();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
@@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
@@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
- TLI.expandABS(N1.getNode(), Result, DAG, true))
- return Result;
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
@@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1.isNullValue())
+ if (N1IsConst && ConstValue1.isZero())
return N1;
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue())
+ if (N1IsConst && ConstValue1.isOne())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ if (N1IsConst && ConstValue1.isAllOnes()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
@@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
- if (!V || V->isNullValue()) {
+ if (!V || V->isZero()) {
ClearMask.push_back(true);
return true;
}
@@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Op0.getNode()->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isNullValue())
+ if (N0C && N0C->isZero())
return N0;
// X / X -> 1
@@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
@@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
- if (C->isNullValue() || C->isOpaque())
+ if (C->isZero() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
- if ((-C->getAPIntValue()).isPowerOf2())
+ if (C->getAPIntValue().isNegatedPowerOf2())
return true;
return false;
};
@@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
- if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (!isSigned && N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
@@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
}
}
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold operation with constant operands.
- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
@@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
@@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
@@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(BitWidth)))
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
- APInt Constant = APInt::getNullValue(1);
+ APInt Constant = APInt::getZero(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
@@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnesValue(EltBitWidth);
+ Constant = APInt::getAllOnes(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
@@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
case ISD::NON_EXTLOAD: B = true; break;
}
- if (B && Constant.isAllOnesValue()) {
+ if (B && Constant.isAllOnes()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
@@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
return SDValue();
}
@@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
- // Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
// The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
@@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
- if (IsRotate && (HasROTL || HasROTR))
- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt);
- else
- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
@@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations)
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// | D |
// Into:
// (x & m) | (y & ~m)
-// If y is a constant, and the 'andn' does not work with immediates,
-// we unfold into a different pattern:
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
@@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDLoc DL(N);
- // If Y is a constant, check that 'andn' works with immediates.
- if (!TLI.hasAndNot(Y)) {
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
@@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
@@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
@@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnesValue(BitWidth);
+ APInt Ones = APInt::getAllOnes(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
@@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSel;
// if (shl x, c) is known to be zero, return 0
- if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
@@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
+
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ if (!IsSignExt && !IsZeroExt)
return SDValue();
- EVT WideVT1 = LeftOp.getValueType();
- EVT WideVT2 = RightOp.getValueType();
- (void)WideVT2;
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getMinSignedBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
// Proceed with the transformation if the wide types match.
- assert((WideVT1 == WideVT2) &&
+ assert((WideVT == RightOp.getValueType()) &&
"Cannot have a multiply node with two different operand types.");
- EVT NarrowVT = LeftOp.getOperand(0).getValueType();
- // Check that the two extend nodes are the same type.
- if (NarrowVT != RightOp.getOperand(0).getValueType())
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
@@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
- RightOp.getOperand(0));
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
- : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSel;
// if (srl x, c) is known to be zero, return 0
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
@@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isNullValue() && C2->isOne()) {
+ if (C1->isZero() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isNullValue() && C2->isAllOnesValue()) {
+ if (C1->isZero() && C2->isAllOnes()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isOne() && C2->isNullValue()) {
+ if (C1->isOne() && C2->isZero()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
- if (C1->isAllOnesValue() && C2->isNullValue()) {
+ if (C1->isAllOnes() && C2->isZero()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
@@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
- C1->isNullValue() && C2->isOne()) {
+ C1->isZero() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
@@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
@@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectOfConstants(N))
return V;
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isNullValue())
+ if (!SCCC->isZero())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
@@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
@@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
Known = DAG.computeKnownBits(Op);
- return (Known.Zero | 1).isAllOnesValue();
+ return (Known.Zero | 1).isAllOnes();
}
/// Given an extending node with a pop-count operand, if the target does not
@@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
- ExtVTBits))) &&
+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
@@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
@@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
- SDLoc DL(BV);
-
// Okay, we know the src/dst types are both integers of differing types.
- // Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
- if (SrcBitSize < DstBitSize) {
- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e;
- i += NumInputsPerOutput) {
- bool isLE = DAG.getDataLayout().isLittleEndian();
- APInt NewBits = APInt(DstBitSize, 0);
- bool EltIsUndef = true;
- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
- // Shift the previously computed bits over.
- NewBits <<= SrcBitSize;
- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.isUndef()) continue;
- EltIsUndef = false;
- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
- zextOrTrunc(SrcBitSize).zext(DstBitSize);
- }
-
- if (EltIsUndef)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
- else
- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
- }
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
- // Finally, this must be the case where we are shrinking elements: each input
- // turns into multiple outputs.
- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- NumOutputsPerInput*BV->getNumOperands());
+ SDLoc DL(BV);
SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
- for (const SDValue &Op : BV->op_values()) {
- if (Op.isUndef()) {
- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
- continue;
- }
-
- APInt OpVal = cast<ConstantSDNode>(Op)->
- getAPIntValue().zextOrTrunc(SrcBitSize);
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal.lshrInPlace(DstBitSize);
- }
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
- // For big endian targets, swap the order of the pieces of each element.
- if (DAG.getDataLayout().isBigEndian())
- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
- }
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
- return DAG.getBuildVector(VT, DL, Ops);
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
@@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ } else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
@@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N1)) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
@@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N10)) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
@@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
@@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0->hasOneUse()) {
+ if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
@@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
@@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
@@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
}
@@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
@@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
@@ -16245,11 +16381,12 @@ struct LoadedSlice {
return false;
// Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlign())
+ // 1. Check the alignment / fast memory access constraint.
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return false;
// 2. Check that the load is a legal operation for that type.
@@ -16270,7 +16407,7 @@ struct LoadedSlice {
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
+ if (UsedBits.isAllOnes())
return true;
// Get rid of the unused bits on the right.
@@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
+ return NarrowedUsedBits.isAllOnes();
}
/// Check whether or not \p First and \p Second are next to each other
@@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
- Imm ^= APInt::getAllOnesValue(BitWidth);
- if (Imm == 0 || Imm.isAllOnesValue())
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
@@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnesValue(NewBW);
+ NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ bool IsFast = false;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return SDValue();
SDValue NewPtr =
@@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
+ bool FastLD = false, FastST = false;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
- return SDValue();
-
- Align LDAlign = LD->getAlign();
- Align STAlign = ST->getAlign();
- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
- if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LDAlign);
+ LD->getPointerInfo(), LD->getAlign());
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
- ST->getPointerInfo(), STAlign);
+ ST->getPointerInfo(), ST->getAlign());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &ConstNode) {
APInt Val;
- // If the add only has one use, this would be OK to do.
- if (AddNode.getNode()->hasOneUse())
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
// Walk all the users of the constant with which we're multiplying.
@@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ Optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
@@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore =
- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstInChain->getAlign());
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
}
// Replace all merged stores with the new store.
@@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
+ IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
@@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
break;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
bool IsFastSt = false;
bool IsFastLd = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
@@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
- if (!isAlias(Chain.getNode(), N))
+ if (!mayAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
@@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!VecEltVT.isByteSized())
return SDValue();
- Align Alignment = OriginalLoad->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Alignment ||
- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
- ISD::NON_EXTLOAD : ISD::EXTLOAD;
- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Alignment = NewAlign;
-
+ Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
+
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
@@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
@@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
@@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
int64_t Offset = checkElem(Op0);
- if (Offset < 0)
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
@@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
@@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ }
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
@@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
return SDValue();
// Allow targets to opt-out.
@@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (!VT.isByteSized())
return SDValue();
- unsigned Index = ExtIdx->getZExtValue();
+ unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
@@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
- if (ConcatSrcNumElts == ExtNumElts)
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
@@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
@@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
- ExtIdx * NVT.getScalarSizeInBits())
+ ExtIdx * NVT.getScalarSizeInBits()) {
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
+ return SDValue();
+
return DAG.getBitcast(NVT, V.getOperand(1));
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
@@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
- if (N0 == N1) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= (int)NumElts) Idx -= NumElts;
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
- }
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
@@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled)
+ return SDValue();
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
- if (Bits.isAllOnesValue())
+ if (Bits.isAllOnes())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
@@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
- SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
@@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
}
/// Visit a binary vector operation, like ADD.
-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVBinOp only works on vectors!");
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
- EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
+ LHS.getValueType(), Ops))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
@@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
- SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
@@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
- SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
- SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
- SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
@@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
- SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
@@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
return V;
return SDValue();
@@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
return SDValue();
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
- return !(SCCC->isNullValue()) ? N2 : N3;
+ return !(SCCC->isZero()) ? N2 : N3;
}
}
@@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
@@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
return SDValue();
}
@@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (C->isNullValue())
+ if (C->isZero())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
-/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
@@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
@@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
@@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
- if (!isAlias(N, C.getNode())) {
+ if (!mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4ca731cfdf62..4d1449bc2751 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
@@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
- for (; RI != RE;) {
- MachineInstr &LocalMI = *RI;
- // Increment before erasing what it points to.
- ++RI;
+ for (MachineInstr &LocalMI :
+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {
Register DefReg = findLocalRegDef(LocalMI);
if (!DefReg)
continue;
@@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
- for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
if (const auto *C = dyn_cast<ConstantInt>(Val)) {
@@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// This includes all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
@@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
+ diagnoseDontCall(*CI);
+
return lowerCallTo(CLI);
}
@@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
auto *NewExpr =
@@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ // See if there's an expression to constant-fold.
+ DIExpression *Expr = DI->getExpression();
+ if (Expr)
+ std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
@@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs.
- if (TM.Options.ValueTrackingVariableLocations) {
+ if (FuncInfo.MF->useDebugInstrRef()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
}
@@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I->getAAMetadata();
if (!Alignment) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlign(ValTy);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 348fad6daf8f..c1bb65409282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(
MIB.addFrameIndex(Op.getFrameIx());
break;
case SDDbgOperand::VREG:
- MIB.addReg(Op.getVReg(), RegState::Debug);
+ MIB.addReg(Op.getVReg());
break;
case SDDbgOperand::SDNODE: {
SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
@@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
DebugLoc DL = SD->getDebugLoc();
auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
MIB.addReg(0U);
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
return &*MIB;
@@ -872,22 +872,33 @@ MachineInstr *
InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
assert(SD->getLocationOps().size() == 1 &&
"Non variadic dbg_value should have only one location op");
+ // See about constant-folding the expression.
+ // Copy the location operand in case we replace it.
+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);
+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {
+ const Value *V = LocationOps[0].getConst();
+ if (auto *C = dyn_cast<ConstantInt>(V)) {
+ std::tie(Expr, C) = Expr->constantFold(C);
+ LocationOps[0] = SDDbgOperand::fromConst(C);
+ }
+ }
+
// Emit non-variadic dbg_value nodes as DBG_VALUE.
// DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
auto MIB = BuildMI(*MF, DL, II);
- AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);
+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);
if (SD->isIndirect())
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Var).addMetadata(Expr);
}
@@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+ EmitDebugInstrRefs = MF->useDebugInstrRef();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d92b23f56e4d..eb9d2286aeb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VP_SCATTER:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(1).getValueType());
break;
@@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
- for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
- UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Vec.getNode()->uses()) {
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
if (ST->isIndexed() || ST->isTruncatingStore() ||
ST->getValue() != Vec)
@@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {
? ISD::FCOS : ISD::FSIN;
SDValue Op0 = Node->getOperand(0);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Op0.getNode()->uses()) {
if (User == Node)
continue;
// The other user might have been turned into sincos already.
@@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
// If CTPOP is legal, use it. Otherwise use shifts and xor.
SDValue Result;
- if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {
Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
} else {
Result = Op;
@@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
@@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
"Don't know how to expand this subtraction!");
- Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT));
+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
@@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
EVT VT = Node->getValueType(0);
- assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
- ->isNullValue() &&
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&
"Unable to expand as libcall if it is not normal rounding");
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
@@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FROUND:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3553f9ec16c2..27f9cede1922 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+ return NewFence;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- APInt API = APInt::getAllOnesValue(Size);
+ APInt API = APInt::getAllOnes(Size);
API.clearBit(Size - 1);
SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+ EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ ? MVT::f16
+ : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8a3dd014901..1fa4d88fcb4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SMAX:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
- case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ false);
+ break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
- case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
- case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::SRA:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ false);
+ break;
+ case ISD::SRL:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ false);
+ break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECREDUCE(N);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntRes_VP_REDUCE(N);
+ break;
+
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
@@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_ASHR:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_LSHR:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ true);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
-// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If any possible shift value won't fit in the prefered type, just use
- // something safe. It will be legalized when the shift is expanded.
- if (!ShiftVT.isVector() &&
- ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
- ShiftVT = MVT::i32;
- return ShiftVT;
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
SDValue V = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::FREEZE, SDLoc(N),
@@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ // If the larger CTLZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {
+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- SDLoc dl(N);
- EVT OVT = N->getValueType(0);
- EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(
@@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If the larger CTPOP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to
+ // TargetLowering.
+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {
+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
@@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
+
+ // If the larger CTTZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the
+ // larger type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&
+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&
+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {
+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
if (N->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
- N->getAddressingMode(), ISD::EXTLOAD);
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
unsigned NewBits = PromotedType.getScalarSizeInBits();
if (Opcode == ISD::UADDSAT) {
- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
- if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
@@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
@@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
+ EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);
+ break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
@@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redundant eventually.
- unsigned OpLEffectiveBits =
- OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
- unsigned OpREffectiveBits =
- OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);
if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
@@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
-
SDValue DataOp = N->getValue();
- EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- SDLoc dl(N);
- bool TruncateStore = false;
if (OpNo == 4) {
+ // The Mask. Update in place.
+ EVT DataVT = DataOp.getValueType();
Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- } else { // Data operand
- assert(OpNo == 1 && "Unexpected operand for promotion");
- DataOp = GetPromotedInteger(DataOp);
- TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+
+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
N->getOffset(), Mask, N->getMemoryVT(),
N->getMemOperand(), N->getAddressingMode(),
- TruncateStore, N->isCompressingStore());
+ /*IsTruncating*/ true, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
- SDLoc dl(N);
- SDValue Op;
+static unsigned getExtendForIntVecReduction(SDNode *N) {
switch (N->getOpcode()) {
- default: llvm_unreachable("Expected integer vector reduction");
+ default:
+ llvm_unreachable("Expected integer vector reduction");
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
- Op = GetPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::ANY_EXTEND;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
- Op = SExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SIGN_EXTEND;
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
- Op = ZExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::ZERO_EXTEND;
}
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {
+ switch (getExtendForIntVecReduction(N)) {
+ default:
+ llvm_unreachable("Impossible extension kind for integer reduction");
+ case ISD::ANY_EXTEND:
+ return GetPromotedInteger(V);
+ case ISD::SIGN_EXTEND:
+ return SExtPromotedInteger(V);
+ case ISD::ZERO_EXTEND:
+ return ZExtPromotedInteger(V);
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
EVT EltVT = Op.getValueType().getVectorElementType();
EVT VT = N->getValueType(0);
+
if (VT.bitsGE(EltVT))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
@@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(OpNo);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Mask
+ // Update in place.
+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+
+ Op = PromoteIntOpVectorReduction(N, Op);
+
+ NewOps[OpNo] = Op;
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = Op.getValueType().getScalarType();
+
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);
+
+ // Result size must be >= element/start-value size. If this is not the case
+ // after promotion, also promote both the start value and result type and
+ // then truncate.
+ NewOps[0] =
+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));
+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue Op = ZExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
@@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to expand the result of this "
"operator!");
+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
- EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
@@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
@@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
- // The type from TLI is too small to fit the shift amount we want.
- // Override it with i32. The shift will have to be legalized.
- ShiftAmtTy = MVT::i32;
- }
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
@@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
} else {
// For unsigned multiplication, we only need to check the max since we
@@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// Saturate to signed maximum.
APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
- APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnes(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
// Saturate to signed minimum.
@@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- assert(ShiftTy.getScalarSizeInBits() >=
- Log2_32_Ceil(VT.getScalarSizeInBits()) &&
- "ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ EVT ShAmtTy =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
+ SDValue Ops[2] = {N->getOperand(0), ShAmt};
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
@@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
LC = RTLIB::MULO_I64;
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // FIXME: This is not an optimal expansion, but better than crashing.
+ EVT WideVT =
+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ SDValue MulLo, MulHi;
+ SplitInteger(Mul, MulLo, MulHi);
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VT, MulLo,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+ SDValue Overflow =
+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
+ SplitInteger(MulLo, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
@@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- // Lower the rotate to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // Lower the funnel shift to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandFunnelShift(N, Res, DAG);
- SplitInteger(Res, Lo, Hi);
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
@@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
if (RHSLo == RHSHi) {
if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnesValue()) {
+ if (RHSCST->isAllOnes()) {
// Equality comparison to -1.
NewLHS = DAG.getNode(ISD::AND, dl,
LHSLo.getValueType(), LHSLo, LHSHi);
@@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// If this is a comparison of the sign bit, just look at the top part.
// X > -1, x < 0
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
- if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
- (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1
NewLHS = LHSHi;
NewRHS = RHSHi;
return;
@@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
- if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
- (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
- (LoCmpC && LoCmpC->isNullValue())))) {
+ // FIXME: Is the HiCmpC->isOne() here correct for
+ // ZeroOrNegativeOneBooleanContent.
+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||
+ (!EqAllowed &&
+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {
// For LE / GE, if high part is known false, ignore the low part.
// For LT / GT: if low part is known false, return the high part.
// if high part is known true, ignore the low part.
@@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
+ // Try and extract from a smaller type so that it eventually falls
+ // into the promotion code below.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+ getTypeAction(InVT) == TargetLowering::TypeLegal) {
+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NElts = NInVT.getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+ DAG.getConstant(alignDown(IdxVal, NElts), dl,
+ BaseIdx.getValueType()));
+ SDValue Step2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+ }
+
+ // Try and extract from a widened type.
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+
// Promote operands and see if this is handled by target lowering,
// Otherwise, use the BUILD_VECTOR approach below
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
@@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumOperands = N->getNumOperands();
+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();
EVT OutElemTy = NOutVT.getVectorElementType();
+ if (OutVT.isScalableVector()) {
+ // Find the largest promoted element type for each of the operands.
+ SDUse *MaxSizedValue = std::max_element(
+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {
+ EVT AVT = A.getValueType().getVectorElementType();
+ EVT BVT = B.getValueType().getVectorElementType();
+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();
+ });
+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();
+
+ // Then promote all vectors to the largest element type.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ SDValue Op = N->getOperand(I);
+ EVT OpVT = Op.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ else
+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&
+ "Unhandled legalization type");
+
+ if (OpVT.getVectorElementType().getScalarSizeInBits() <
+ MaxElementVT.getScalarSizeInBits())
+ Op = DAG.getAnyExtOrTrunc(Op, dl,
+ OpVT.changeVectorElementType(MaxElementVT));
+ Ops.push_back(Op);
+ }
+
+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)
+ // NOutVT.
+ return DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ OutVT.changeVectorElementType(MaxElementVT), Ops),
+ dl, NOutVT);
+ }
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
- unsigned NumOutElem = NOutVT.getVectorNumElements();
- unsigned NumOperands = N->getNumOperands();
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
@@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
// we can simply change the result type.
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
+ // The VP_REDUCE result size may be larger than the element size, so we can
+ // simply change the result type. However the start value and result must be
+ // the same.
+ SDLoc DL(N);
+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // The result type is equal to the first input operand's type, so the
+ // type that needs promoting must be the second source vector.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ SDValue Idx = N->getOperand(2);
+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),
+ V1.getValueType().getVectorElementType(),
+ V0.getValueType().getVectorElementCount());
+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);
+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 05a974af3b55..1f73c9eea104 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- SDNode *N = Worklist.back();
- Worklist.pop_back();
+ SDNode *N = Worklist.pop_back_val();
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8d17d8fc68b1..da282ecad282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -289,6 +289,12 @@ private:
return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
+ // Promote the given operand V (vector or scalar) according to N's specific
+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
+ // promoted value.
+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -332,14 +338,14 @@ private:
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
- SDValue PromoteIntRes_SHL(SDNode *N);
- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);
SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
- SDValue PromoteIntRes_SRA(SDNode *N);
- SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
@@ -353,6 +359,7 @@ private:
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
@@ -369,6 +376,7 @@ private:
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
@@ -394,6 +402,7 @@ private:
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -518,6 +527,7 @@ private:
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N);
@@ -816,7 +826,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -898,6 +908,7 @@ private:
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
@@ -912,7 +923,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
- SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -972,10 +983,10 @@ private:
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper function to generate a set of stores to store a widen vector into
- /// non-widen memory.
+ /// non-widen memory. Returns true if successful, false otherwise.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
- void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
@@ -1011,6 +1022,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 81cc2bf10d25..3d3c9a2ad837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
}
+
+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc DL(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ebe3bfc4b75a..88a28a3be53e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return RecursivelyLegalizeResults(Op, ResultVals);
}
-// FIXME: This is very similar to the X86 override of
-// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
+// merge them somehow?
bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
@@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandBITREVERSE(Node, Results);
return;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
- Mask = DAG.getSelect(DL, BitTy, Mask,
- DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
- BitTy),
- DAG.getConstant(0, DL, BitTy));
+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
+ DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
if (VT.isFixedLengthVector())
@@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
@@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
EVT VT = Node->getValueType(0);
+ // Scalable vectors can't use shuffle expansion.
+ if (VT.isScalableVector())
+ return TLI.expandBSWAP(Node, DAG);
+
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
- if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Node);
+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
- SDLoc DL(Node);
- SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Otherwise unroll.
+ return DAG.UnrollVectorOp(Node);
}
void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
+ // We can't unroll or use shuffles for scalable vectors.
+ if (VT.isScalableVector()) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
// If we have the scalar operation, it's probably cheaper to unroll it.
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- // Let LegalizeDAG handle this later.
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
return;
+ }
// Otherwise unroll.
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
@@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
if (Node->getOpcode() == ISD::STRICT_FSETCC ||
Node->getOpcode() == ISD::STRICT_FSETCCS)
ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
- DAG.getConstant(0, dl, EltVT));
+ DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
OpValues.push_back(ScalarResult);
OpChains.push_back(ScalarChain);
@@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 91242bbf866f..539c9cb9c256 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue Arg = N->getOperand(2).getOperand(0);
if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
return GetScalarizedVector(N->getOperand(Op));
}
@@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- SplitVecRes_BinOp(N, Lo, Hi);
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
break;
case ISD::FMA:
case ISD::FSHL:
@@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
}
}
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
+ bool IsVP) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ if (!IsVP) {
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ return;
+ }
+
+ // Split the mask.
+ SDValue MaskLo, MaskHi;
+ SDValue Mask = N->getOperand(2);
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ SDValue EVL = N->getOperand(3);
+ EVT VecVT = N->getValueType(0);
+ EVT EVLVT = EVL.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
+ : DAG.getVScale(dl, EVLVT,
+ APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
+ SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = WidenVecRes_INSERT_SUBVECTOR(N);
+ break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
@@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_Binary(N, /*IsVP*/ false);
break;
case ISD::FADD:
@@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ Res = WidenVecRes_Binary(N, /*IsVP*/ true);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
+ N->getFlags());
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle the
+ // case where the mask needs widening to an identically-sized type as the
+ // vector inputs.
+ SDValue Mask = N->getOperand(2);
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Unable to widen binary VP op");
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
@@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
EVT InVT = InOp.getValueType();
@@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
- unsigned InVTNumElts = InVT.getVectorNumElements();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
@@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
@@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
- if (InVTNumElts % WidenNumElts == 0) {
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
@@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ auto InOpTypeAction = getTypeAction(InOp.getValueType());
+ if (InOpTypeAction == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
- if (VT.isScalableVector())
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
-
// Check if we can extract from the vector.
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned InNumElts = InVT.getVectorMinNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ if (VT.isScalableVector()) {
+ // Try to split the operation up into smaller extracts and concat the
+ // results together, e.g.
+ // nxv6i64 extract_subvector(nxv12i64, 6)
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv16i64, 6)
+ // nxv2i64 extract_subvector(nxv16i64, 8)
+ // nxv2i64 extract_subvector(nxv16i64, 10)
+ // undef)
+ unsigned VTNElts = VT.getVectorMinNumElements();
+ unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ // Avoid recursion around e.g. nxv1i8.
+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
+ SmallVector<SDValue> Parts;
+ unsigned I = 0;
+ for (; I < VTNElts / GCD; ++I)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
+ for (; I < WidenNumElts / GCD; ++I)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+ }
+
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i = 0; i < NumElts; ++i)
@@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
else
Result = GenWidenVectorLoads(LdChain, LD);
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- return Result;
+ return Result;
+ }
+
+ // Generate a vector-predicated load if it is custom/legal on the target. To
+ // avoid possible recursion, only do this if the widened mask type is legal.
+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ EVT LdVT = LD->getMemoryVT();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDLoc DL(N);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = LdVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = LD->getMemOperand();
+ SDValue NewLoad =
+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),
+ MMO->getAAInfo());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
+ report_fatal_error("Unable to widen vector load");
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
@@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
@@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
}
EVT CondEltVT = CondVT.getVectorElementType();
- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
- CondEltVT, WidenNumElts);
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
@@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return TLI.scalarizeVectorStore(ST, DAG);
SmallVector<SDValue, 16> StChain;
- GenWidenVectorStores(StChain, ST);
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
- if (StChain.size() == 1)
- return StChain[0];
- else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
+ // Generate a vector-predicated store if it is custom/legal on the target.
+ // To avoid possible recursion, only do this if the widened mask type is
+ // legal.
+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ SDValue StVal = ST->getValue();
+ EVT StVT = StVal.getValueType();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ // Widen the value.
+ SDLoc DL(N);
+ StVal = GetWidenedVector(StVal);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = StVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = ST->getMemOperand();
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
+ EVL, MMO->getPointerInfo(), MMO->getAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
+ }
+
+ report_fatal_error("Unable to widen vector store");
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
@@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
@@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
}
}
+ // Using element-wise loads and stores for widening operations is not
+ // supported for scalable vectors
if (Scalable)
- report_fatal_error("Using element-wise loads and stores for widening "
- "operations is not supported for scalable vectors");
+ return None;
+
return RetVT;
}
@@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
- unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- TypeSize NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ // Unless we're able to load in one instruction we must work out how to load
+ // the remainder.
+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+ Optional<EVT> NewVT = FirstVT;
+ TypeSize RemainingWidth = LdWidth;
+ TypeSize NewVTWidth = FirstVTWidth;
+ do {
+ RemainingWidth -= NewVTWidth;
+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
+ LdAlign, WidthDiff.getKnownMinSize());
+ if (!NewVT)
+ return SDValue();
+ NewVTWidth = NewVT->getSizeInBits();
+ }
+ MemVTs.push_back(*NewVT);
+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+ }
+
+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
- if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ if (MemVTs.empty()) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
- if (NewVT == WidenVT)
+ if (FirstVT == WidenVT)
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
- unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
@@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
uint64_t ScaledOffset = 0;
MachinePointerInfo MPI = LD->getPointerInfo();
- do {
- LdWidth -= NewVTWidth;
- IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
- &ScaledOffset);
- if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
- // The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- NewVTWidth = NewVT.getSizeInBits();
- }
+ // First incremement past the first load.
+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
+ &ScaledOffset);
+ for (EVT MemVT : MemVTs) {
Align NewAlign = ScaledOffset == 0
? LD->getOriginalAlign()
: commonAlignment(LD->getAlign(), ScaledOffset);
SDValue L =
- DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
LdOps.push_back(L);
- LdOp = L;
- } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
+ LdChain.push_back(L.getValue(1));
+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+ }
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
return DAG.getBuildVector(WidenVT, dl, Ops);
}
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
// The routine chops the vector into the largest vector stores with the same
@@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MachinePointerInfo MPI = ST->getPointerInfo();
uint64_t ScaledOffset = 0;
+
+ // A breakdown of how to widen this vector store. Each element of the vector
+ // is a memory VT combined with the number of times it is to be stored to,
+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ Optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ if (!NewVT)
+ return false;
+ MemVTs.push_back({*NewVT, 0});
+ TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+ do {
+ StWidth -= NewVTWidth;
+ MemVTs.back().second++;
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ }
+
+ for (const auto &Pair : MemVTs) {
+ EVT NewVT = Pair.first;
+ unsigned Count = Pair.second;
TypeSize NewVTWidth = NewVT.getSizeInBits();
if (NewVT.isVector()) {
@@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
Idx += NumVTElts;
-
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
&ScaledOffset);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
} else {
// Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
@@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
+
+ return true;
}
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 75b4242a415c..f64b332a7fef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -192,7 +192,7 @@ public:
// Returns the SDNodes which this SDDbgValue depends on.
SmallVector<SDNode *> getSDNodes() const {
SmallVector<SDNode *> Dependencies;
- for (SDDbgOperand DbgOp : getLocationOps())
+ for (const SDDbgOperand &DbgOp : getLocationOps())
if (DbgOp.getKind() == SDDbgOperand::SDNODE)
Dependencies.push_back(DbgOp.getSDNode());
for (SDNode *Node : getAdditionalDependencies())
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0022e5ec31f0..1b89864116cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -56,9 +56,7 @@ namespace {
SUnit *pop() {
if (empty()) return nullptr;
- SUnit *V = Queue.back();
- Queue.pop_back();
- return V;
+ return Queue.pop_back_val();
}
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b2a8c8bdd78c..95f7e43b151d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// There are either zero or one users of the Glue result.
bool HasGlueUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI)
- if (GlueVal.isOperandOf(*UI)) {
+ for (SDNode *U : N->uses())
+ if (GlueVal.isOperandOf(U)) {
HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
- N = *UI;
+ N = U;
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
break;
@@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
/// Returns true if \p DV has any VReg operand locations which don't exist in
/// VRBaseMap.
auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
- for (SDDbgOperand L : DV->getLocationOps()) {
+ for (const SDDbgOperand &L : DV->getLocationOps()) {
if (L.getKind() == SDDbgOperand::SDNODE &&
VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a98464425c4..008665d50233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {
}
}
+bool ISD::isVPBinaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+bool ISD::isVPReduction(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
/// The operand position of the vector mask.
Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
@@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::VP_LOAD: {
+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);
+ ID.AddInteger(ELD->getMemoryVT().getRawBits());
+ ID.AddInteger(ELD->getRawSubclassData());
+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_STORE: {
+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
+ ID.AddInteger(EST->getMemoryVT().getRawBits());
+ ID.AddInteger(EST->getRawSubclassData());
+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);
+ ID.AddInteger(EG->getMemoryVT().getRawBits());
+ ID.AddInteger(EG->getRawSubclassData());
+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);
+ ID.AddInteger(ES->getMemoryVT().getRawBits());
+ ID.AddInteger(ES->getRawSubclassData());
+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::MLOAD: {
const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
ID.AddInteger(MLD->getMemoryVT().getRawBits());
@@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue NegOne =
- getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
- return getNode(ISD::XOR, DL, VT, Val, NegOne);
+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
@@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isNullValue())
+ if (C->isZero())
return N1;
}
@@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
- switch (Cond) {
- default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
- case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
- case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
- case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
- case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
- case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
- case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
- case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
- case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
- }
+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),
+ dl, VT, OpVT);
}
}
@@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
return SDValue();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return GetDemandedBits(V, DemandedBits, DemandedElts);
}
@@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
- ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ ? APInt::getAllOnes(DemandedElts.getBitWidth())
: APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
@@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
switch (V.getOpcode()) {
case ISD::BUILD_VECTOR: {
@@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
// For now we don't support this with scalable vectors.
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
APInt DemandedElts;
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
}
@@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Known.One.setAllBits();
Known.Zero.setAllBits();
@@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// bits from the overlapping larger input elements and extracting the
// sub sections we actually care about.
unsigned SubScale = SubBitWidth / BitWidth;
- APInt SubDemandedElts(NumElts / SubScale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SubDemandedElts.setBit(i / SubScale);
-
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
}))
return true;
+ // Is the operand of a splat vector a constant power of two?
+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))
+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return 1;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
@@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(VT.isVector() && "Expected bitcast to vector");
unsigned Scale = SrcBits / VTBits;
- APInt SrcDemandedElts(NumElts / Scale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
+ APInt SrcDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);
// Fast case - sign splat can be simply split across the small elements.
Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
@@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS =
isConstOrConstSplat(Op.getOperand(1), DemandedElts))
- if (CRHS->isAllOnesValue()) {
+ if (CRHS->isAllOnes()) {
KnownBits Known =
computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS =
isConstOrConstSplat(Op.getOperand(0), DemandedElts))
- if (CLHS->isNullValue()) {
+ if (CLHS->isZero()) {
KnownBits Known =
computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedSubElts) {
@@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(FirstAnswer, Mask.countLeadingOnes());
}
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
unsigned Depth) const {
// Early out for FREEZE.
@@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
return false;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
}
@@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::UNDEF:
return PoisonOnly;
- // TODO: ISD::BUILD_VECTOR handling
+ case ISD::BUILD_VECTOR:
+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
+ // this shouldn't affect the result.
+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,
+ Depth + 1))
+ return false;
+ }
+ return true;
// TODO: Search for noundef attributes from library functions.
@@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
"Floating point types unsupported - use isKnownNeverZeroFloat");
// If the value is a constant, we can obviously see if it is a zero or not.
- if (ISD::matchUnaryPredicate(
- Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ if (ISD::matchUnaryPredicate(Op,
+ [](ConstantSDNode *C) { return !C->isZero(); }))
return true;
// TODO: Recognize more cases here.
@@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
SelectionDAG &DAG) {
- if (cast<ConstantSDNode>(Step)->isNullValue())
+ if (cast<ConstantSDNode>(Step)->isZero())
return DAG.getConstant(0, DL, VT);
return SDValue();
@@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
+ APInt::getZero(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = {Operand};
- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
@@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
return SDValue();
- // For now, the array Ops should only contain two values.
- // This enforcement will be removed once this function is merged with
- // FoldConstantVectorArithmetic
- if (Ops.size() != 2)
+ unsigned NumOps = Ops.size();
+ if (NumOps == 0)
return SDValue();
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- SDNode *N1 = Ops[0].getNode();
- SDNode *N2 = Ops[1].getNode();
-
// Handle the case of two scalars.
- if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
- if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
-
- Optional<APInt> FoldAttempt =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
- if (!FoldAttempt)
- return SDValue();
-
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
- assert((!Folded || !VT.isVector()) &&
- "Can't fold vectors ops with scalar operands");
- return Folded;
- }
- }
+ if (NumOps == 2) {
+ // TODO: Move foldConstantFPMath here?
- // fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
- return FoldSymbolOffset(Opcode, VT, GA, N2);
- if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
- return FoldSymbolOffset(Opcode, VT, GA, N1);
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
- // For fixed width vectors, extract each constant element and fold them
- // individually. Either input may be an undef value.
- bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
- N1->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV1 && !N1->isUndef())
- return SDValue();
- bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV2 && !N2->isUndef())
- return SDValue();
- // If both operands are undef, that's handled the same way as scalars.
- if (!IsBVOrSV1 && !IsBVOrSV2)
- return SDValue();
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
- EVT SVT = VT.getScalarType();
- EVT LegalSVT = SVT;
- if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
- if (LegalSVT.bitsLT(SVT))
- return SDValue();
- }
-
- SmallVector<SDValue, 4> Outputs;
- unsigned NumOps = 0;
- if (IsBVOrSV1)
- NumOps = std::max(NumOps, N1->getNumOperands());
- if (IsBVOrSV2)
- NumOps = std::max(NumOps, N2->getNumOperands());
- assert(NumOps != 0 && "Expected non-zero operands");
- // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
- // one iteration for that.
- assert((!VT.isScalableVector() || NumOps == 1) &&
- "Scalable vector should only have one scalar");
-
- for (unsigned I = 0; I != NumOps; ++I) {
- // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
- // to use operand 0 of the SPLAT_VECTOR for each fixed element.
- SDValue V1;
- if (N1->getOpcode() == ISD::BUILD_VECTOR)
- V1 = N1->getOperand(I);
- else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
- V1 = N1->getOperand(0);
- else
- V1 = getUNDEF(SVT);
-
- SDValue V2;
- if (N2->getOpcode() == ISD::BUILD_VECTOR)
- V2 = N2->getOperand(I);
- else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
- V2 = N2->getOperand(0);
- else
- V2 = getUNDEF(SVT);
-
- if (SVT.isInteger()) {
- if (V1.getValueType().bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2.getValueType().bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
}
- if (V1.getValueType() != SVT || V2.getValueType() != SVT)
- return SDValue();
-
- // Fold one vector element.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
- if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
-
- // Scalar folding only succeeded if the result is a constant or UNDEF.
- if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
- ScalarResult.getOpcode() != ISD::ConstantFP)
- return SDValue();
- Outputs.push_back(ScalarResult);
- }
-
- if (N1->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::BUILD_VECTOR) {
- assert(VT.getVectorNumElements() == Outputs.size() &&
- "Vector size mismatch!");
-
- // Build a big vector out of the scalar elements we generated.
- return getBuildVector(VT, SDLoc(), Outputs);
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
}
- assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR) &&
- "One operand should be a splat vector");
-
- assert(Outputs.size() == 1 && "Vector size mismatch!");
- return getSplatVector(VT, SDLoc(), Outputs[0]);
-}
-
-// TODO: Merge with FoldConstantArithmetic
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
- const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags) {
- // If the opcode is a target-specific ISD node, there's nothing we can
- // do here and the operand rules may not line up with the below, so
- // bail early.
- if (Opcode >= ISD::BUILTIN_OP_END)
- return SDValue();
-
- if (isUndef(Opcode, Ops))
- return getUNDEF(VT);
-
- // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ // This is for vector folding only from here on.
if (!VT.isVector())
return SDValue();
ElementCount NumElts = VT.getVectorElementCount();
+ // See if we can fold through bitcasted integer ops.
+ // TODO: Can we handle undef elements?
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
+ UndefElts1.none() && UndefElts2.none()) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(Fold.getValue());
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
};
- auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
- APInt SplatVal;
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
- (BV && BV->isConstant()) ||
- (Op.getOpcode() == ISD::SPLAT_VECTOR &&
- ISD::isConstantSplatVector(Op.getNode(), SplatVal));
+ Op.getOpcode() == ISD::BUILD_VECTOR ||
+ Op.getOpcode() == ISD::SPLAT_VECTOR;
};
// All operands must be vector types with the same number of elements as
- // the result type and must be either UNDEF or a build vector of constant
+ // the result type and must be either UNDEF or a build/splat vector
// or UNDEF scalars.
- if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// For scalable vector types we know we're dealing with SPLAT_VECTORs. We
// only have one operand to check. For fixed-length vector types we may have
// a combination of BUILD_VECTOR and SPLAT_VECTOR.
- unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
- for (unsigned I = 0; I != NumOperands; I++) {
+ for (unsigned I = 0; I != NumVectorElts; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
Op.getOpcode() != ISD::SPLAT_VECTOR) {
- // We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
else
@@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N2;
- if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ if (N2C && N2C->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
@@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
@@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
break;
case ISD::FP_ROUND:
@@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
}
@@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::VECTOR_SPLICE: {
+ if (cast<ConstantSDNode>(N3)->isNullValue())
+ return N1;
+ break;
+ }
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
@@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
ArgChains.push_back(Chain);
// Add a chain value for each stack argument.
- for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
- UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0)
ArgChains.push_back(SDValue(L, 1));
@@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
if (!TLI.findOptimalMemOpLowering(
MemOps, TLI.getMaxStoresPerMemset(OptSize),
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
@@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memcpy with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemcpyLoadsAndStores(
@@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memmove with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemmoveLoadsAndStores(
@@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memset with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
@@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,
+ MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *LD = cast<VPLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getMask(),
+ LD->getVectorLength(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),
+ nullptr, LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, IsCompressing, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
+ IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *ST = cast<VPStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,
+ Offset, ST->getMask(), ST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStoreSDNode>(
+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),
+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Base, SDValue Offset, SDValue Mask,
SDValue PassThru, EVT MemVT,
@@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select true, T, F --> T
// select false, T, F --> F
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
- return CondC->isNullValue() ? F : T;
+ return CondC->isZero() ? F : T;
// TODO: This should simplify VSELECT with constant condition using something
// like this (but check boolean contents to be complete?):
@@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence() {
+void SelectionDAG::VerifyDAGDivergence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
for (auto *N : TopoOrder) {
@@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// before SortedPos will contain the topological sort index, and the
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = &*I++;
- checkForCycles(N, this);
- unsigned Degree = N->getNumOperands();
+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {
+ checkForCycles(&N, this);
+ unsigned Degree = N.getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
- N->setNodeId(DAGSize++);
- allnodes_iterator Q(N);
+ N.setNodeId(DAGSize++);
+ allnodes_iterator Q(&N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Temporarily use the Node Id as scratch space for the degree count.
- N->setNodeId(Degree);
+ N.setNodeId(Degree);
}
}
@@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
std::string ErrorStr;
raw_string_ostream ErrorFormatter(ErrorStr);
-
ErrorFormatter << "Undefined external symbol ";
ErrorFormatter << '"' << Symbol << '"';
- ErrorFormatter.flush();
-
- report_fatal_error(ErrorStr);
+ report_fatal_error(Twine(ErrorFormatter.str()));
}
//===----------------------------------------------------------------------===//
@@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
bool llvm::isNullConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
+ return Const != nullptr && Const->isZero();
}
bool llvm::isNullFPConstant(SDValue V) {
@@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {
bool llvm::isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
+ return Const != nullptr && Const->isAllOnes();
}
bool llvm::isOneConstant(SDValue V) {
@@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
ConstantSDNode *C =
isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
- return C && C->isNullValue();
+ return C && C->isZero();
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
@@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;
}
HandleSDNode::~HandleSDNode() {
@@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
/// isOnlyUserOf - Return true if this node is the only use of N.
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (User == this)
Seen = true;
else
@@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
/// Return true if the only users of N are contained in Nodes.
bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (llvm::is_contained(Nodes, User))
Seen = true;
else
@@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
"Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
- LoVT = EnvVT;
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
- HiVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
*HiIsEmpty = true;
}
return std::make_pair(LoVT, HiVT);
@@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getSplatValue(DemandedElts, UndefElements);
}
@@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
}
@@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
return -1;
}
+bool BuildVectorSDNode::getConstantRawBits(
+ bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {
+ // Early-out if this contains anything but Undef/Constant/ConstantFP.
+ if (!isConstant())
+ return false;
+
+ unsigned NumSrcOps = getNumOperands();
+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+
+ // Extract raw src bits.
+ SmallVector<APInt> SrcBitElements(NumSrcOps,
+ APInt::getNullValue(SrcEltSizeInBits));
+ BitVector SrcUndeElements(NumSrcOps, false);
+
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ SrcUndeElements.set(I);
+ continue;
+ }
+ auto *CInt = dyn_cast<ConstantSDNode>(Op);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
+ assert((CInt || CFP) && "Unknown constant");
+ SrcBitElements[I] =
+ CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
+ }
+
+ // Recast to dst width.
+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,
+ SrcBitElements, UndefElements, SrcUndeElements);
+ return true;
+}
+
+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
+ unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements) {
+ unsigned NumSrcOps = SrcBitElements.size();
+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+ assert(NumSrcOps == SrcUndefElements.size() &&
+ "Vector size mismatch");
+
+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
+ DstUndefElements.clear();
+ DstUndefElements.resize(NumDstOps, false);
+ DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+
+ // Concatenate src elements constant bits together into dst element.
+ if (SrcEltSizeInBits <= DstEltSizeInBits) {
+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;
+ for (unsigned I = 0; I != NumDstOps; ++I) {
+ DstUndefElements.set(I);
+ APInt &DstBits = DstBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ if (SrcUndefElements[Idx])
+ continue;
+ DstUndefElements.reset(I);
+ const APInt &SrcBits = SrcBitElements[Idx];
+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&
+ "Illegal constant bitwidths");
+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);
+ }
+ }
+ return;
+ }
+
+ // Split src element constant bits into dst elements.
+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ if (SrcUndefElements[I]) {
+ DstUndefElements.set(I * Scale, (I + 1) * Scale);
+ continue;
+ }
+ const APInt &SrcBits = SrcBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ APInt &DstBits = DstBitElements[Idx];
+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);
+ }
+ }
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 20c7d771bfb6..6d8252046501 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include <cstdint>
@@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
- IsAlias = false;
- return true;
+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ // We can derive NoAlias In case of mismatched base types.
+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {
+ IsAlias = false;
+ return true;
+ }
+ if (IsGV0 && IsGV1) {
+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();
+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();
+ // It doesn't make sense to access one global value using another globals
+ // values address, so we can assume that there is no aliasing in case of
+ // two different globals (unless we have symbols that may indirectly point
+ // to each other).
+ // FIXME: This is perhaps a bit too defensive. We could try to follow the
+ // chain with aliasee information for GlobalAlias variables to find out if
+ // we indirect symbols may alias or not.
+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {
+ IsAlias = false;
+ return true;
+ }
+ }
}
return false; // Cannot determine whether the pointers alias.
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d56d4bcc9169..5d911c165293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isVector()) {
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
- if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ PartEVT =
+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ if (PartEVT == ValueVT)
+ return Val;
}
- // Vector/Vector bitcast.
- if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
-
- assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
- "Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
-
}
// Trivial bitcast if the types are the same size and the destination
@@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else if (PartEVT.isVector() &&
+ PartEVT.getVectorElementType() !=
+ ValueVT.getVectorElementType() &&
+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
+ TargetLowering::TypeWidenVector) {
+ // Combination of widening and promotion.
+ EVT WidenVT =
+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
+ PartVT.getVectorElementCount());
+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
@@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
- } else if (SDValue Widened =
- widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
- Val = Widened;
- } else if (BuiltVectorTy.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- BuiltVectorTy.getVectorElementCount() ==
- ValueVT.getVectorElementCount()) {
- // Promoted vector extract
- Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
+ } else {
+ if (BuiltVectorTy.getVectorElementType().bitsGT(
+ ValueVT.getVectorElementType())) {
+ // Integer promotion.
+ ValueVT = EVT::getVectorVT(*DAG.getContext(),
+ BuiltVectorTy.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ }
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
@@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
+ SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- DIExpression *SalvagedExpr =
- salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
-
+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
+ if (!V)
+ break;
+
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ if (!AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
- V = VAsInst.getOperand(0);
- Expr = SalvagedExpr;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
@@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
@@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F->getAttributes().hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(
- AttributeList::ReturnIndex, Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
- Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ Outs.push_back(ISD::OutputArg(Flags,
+ Parts[i].getValueType().getSimpleVT(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
@@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
- Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
- EVT(TLI.getPointerTy(DL)) /*argvt*/,
- true /*isfixed*/, 1 /*origidx*/,
- 0 /*partOffs*/));
+ Outs.push_back(ISD::OutputArg(
+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
@@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- if (!JTH.OmitRangeCheck) {
+ if (!JTH.FallthroughUnreachable) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
@@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
@@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
@@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
+ else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
@@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
- AAInfo))) &&
+ I.getAAMetadata()))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
@@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
@@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
@@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
@@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
@@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Info.align, Info.flags, Info.size,
+ I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
- if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg, RegState::Debug);
+ MIB.addReg(Reg);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
@@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
- for (auto RegAndSize : SplitRegs) {
+ for (const auto &RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
@@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
- MachinePointerInfo(I.getArgOperand(0)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(0)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MM);
return;
}
@@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Values.empty())
return;
- if (std::count(Values.begin(), Values.end(), nullptr))
+ if (llvm::is_contained(Values, nullptr))
return;
bool IsVariadic = DI.hasArgList();
@@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
- I.getAttributes()
- .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
- .getValueAsString();
+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
@@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
};
SmallVector<BranchFunnelTarget, 8> Targets;
- for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
@@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
+ if (VPIntrin.getFastMathFlags().allowReassoc())
+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
+ : ISD::VP_REDUCE_FMUL;
+ }
+
return ResOPC.getValue();
}
+void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isGather) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ if (!isGather) {
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation::getAfter(PtrOperand);
+ else
+ ML = MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
+ AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ }
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isScatter) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!isScatter) {
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ ST =
+ DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
+ }
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
@@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
// Request operands.
SmallVector<SDValue, 7> OpValues;
- for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
- setValue(&VPIntrin, Result);
+ switch (Opcode) {
+ default: {
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ case ISD::VP_LOAD:
+ case ISD::VP_GATHER:
+ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
+ Opcode == ISD::VP_GATHER);
+ break;
+ case ISD::VP_STORE:
+ case ISD::VP_SCATTER:
+ visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ break;
+ }
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
@@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
if (Function *F = I.getCalledFunction()) {
+ diagnoseDontCall(I);
+
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8176,7 +8307,7 @@ public:
}
}
- return TLI.getValueType(DL, OpTy, true);
+ return TLI.getAsmOperandValueType(DL, OpTy, true);
}
};
@@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo,
- SDISelAsmOperandInfo &RefOpInfo) {
+static llvm::Optional<unsigned>
+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory operations.
if (OpInfo.ConstraintType == TargetLowering::C_Memory)
- return;
+ return None;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return;
+ return None;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return;
+ return None;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// Do not check for single registers.
if (AssignedReg) {
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "AssignedReg should be member of RC");
+ I = std::find(I, RC->end(), AssignedReg);
+ if (I == RC->end()) {
+ // RC does not contain the selected register, which indicates a
+ // mismatch between the register and the required type/bitwidth.
+ return {AssignedReg};
+ }
}
for (; NumRegs; --NumRegs, ++I) {
@@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return None;
}
static unsigned
@@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
if (isa<CallBrInst>(Call) &&
- ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -
cast<CallBrInst>(&Call)->getNumIndirectDests() -
NumMatchingOps) &&
(NumMatchingOps == 0 ||
- ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
- NumMatchingOps))) {
+ ArgNo - 1 <
+ (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT =
- TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());
+ OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
+ DAG.getDataLayout(), Call.getType()).getSimpleVT();
}
++ResNo;
} else {
@@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ const auto RegError =
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ if (RegError.hasValue()) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const char *RegName = TRI.getName(RegError.getValue());
+ emitInlineAsmError(Call, "register '" + Twine(RegName) +
+ "' allocated for constraint '" +
+ Twine(OpInfo.ConstraintCode) +
+ "' does not match required type");
+ return;
+ }
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
@@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
- const TargetRegisterClass *RC = TiedReg.isVirtual() ?
- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
+ const TargetRegisterClass *RC =
+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
+ : TRI.getMinimalPhysRegClass(TiedReg);
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
@@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
- if (I.getNumArgOperands() > 1)
+ if (I.arg_size() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs, i,
- j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ ISD::OutputArg MyFlags(
+ Flags, Parts[j].getValueType().getSimpleVT(), VT,
+ i < CLI.NumFixedArgs, i,
+ j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
- FuncInfo.PreferredExtendType.end())
- ? ISD::ANY_EXTEND
- : FuncInfo.PreferredExtendType[V];
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
-/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
-/// is 0.
-MachineBasicBlock *
-SelectionDAGBuilder::StackProtectorDescriptor::
-AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB) {
- // If SuccBB has not been created yet, create it.
- if (!SuccMBB) {
- MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI(ParentMBB);
- SuccMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(++BBI, SuccMBB);
- }
- // Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
- return SuccMBB;
-}
-
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
@@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- JTH->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index df5be156821f..d6122aa0a739 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -180,204 +181,6 @@ private:
SwitchCG::CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb);
- /// A class which encapsulates all of the information needed to generate a
- /// stack protector check and signals to isel via its state being initialized
- /// that a stack protector needs to be generated.
- ///
- /// *NOTE* The following is a high level documentation of SelectionDAG Stack
- /// Protector Generation. The reason that it is placed here is for a lack of
- /// other good places to stick it.
- ///
- /// High Level Overview of SelectionDAG Stack Protector Generation:
- ///
- /// Previously, generation of stack protectors was done exclusively in the
- /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
- /// splitting basic blocks at the IR level to create the success/failure basic
- /// blocks in the tail of the basic block in question. As a result of this,
- /// calls that would have qualified for the sibling call optimization were no
- /// longer eligible for optimization since said calls were no longer right in
- /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
- /// instruction).
- ///
- /// Then it was noticed that since the sibling call optimization causes the
- /// callee to reuse the caller's stack, if we could delay the generation of
- /// the stack protector check until later in CodeGen after the sibling call
- /// decision was made, we get both the tail call optimization and the stack
- /// protector check!
- ///
- /// A few goals in solving this problem were:
- ///
- /// 1. Preserve the architecture independence of stack protector generation.
- ///
- /// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform-specific stack protector
- /// generation.
- ///
- /// The main problem that guided the present solution is that one can not
- /// solve this problem in an architecture independent manner at the IR level
- /// only. This is because:
- ///
- /// 1. The decision on whether or not to perform a sibling call on certain
- /// platforms (for instance i386) requires lower level information
- /// related to available registers that can not be known at the IR level.
- ///
- /// 2. Even if the previous point were not true, the decision on whether to
- /// perform a tail call is done in LowerCallTo in SelectionDAG which
- /// occurs after the Stack Protector Pass. As a result, one would need to
- /// put the relevant callinst into the stack protector check success
- /// basic block (where the return inst is placed) and then move it back
- /// later at SelectionDAG/MI time before the stack protector check if the
- /// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform-specific pattern
- /// matching. The SelectionDAG level option was nixed because
- /// SelectionDAG only processes one IR level basic block at a time
- /// implying one could not create a DAG Combine to move the callinst.
- ///
- /// To get around this problem a few things were realized:
- ///
- /// 1. While one can not handle multiple IR level basic blocks at the
- /// SelectionDAG Level, one can generate multiple machine basic blocks
- /// for one IR level basic block. This is how we handle bit tests and
- /// switches.
- ///
- /// 2. At the MI level, tail calls are represented via a special return
- /// MIInst called "tcreturn". Thus if we know the basic block in which we
- /// wish to insert the stack protector check, we get the correct behavior
- /// by always inserting the stack protector check right before the return
- /// statement. This is a "magical transformation" since no matter where
- /// the stack protector check intrinsic is, we always insert the stack
- /// protector check code at the end of the BB.
- ///
- /// Given the aforementioned constraints, the following solution was devised:
- ///
- /// 1. On platforms that do not support SelectionDAG stack protector check
- /// generation, allow for the normal IR level stack protector check
- /// generation to continue.
- ///
- /// 2. On platforms that do support SelectionDAG stack protector check
- /// generation:
- ///
- /// a. Use the IR level stack protector pass to decide if a stack
- /// protector is required/which BB we insert the stack protector check
- /// in by reusing the logic already therein. If we wish to generate a
- /// stack protector check in a basic block, we place a special IR
- /// intrinsic called llvm.stackprotectorcheck right before the BB's
- /// returninst or if there is a callinst that could potentially be
- /// sibling call optimized, before the call inst.
- ///
- /// b. Then when a BB with said intrinsic is processed, we codegen the BB
- /// normally via SelectBasicBlock. In said process, when we visit the
- /// stack protector check, we do not actually emit anything into the
- /// BB. Instead, we just initialize the stack protector descriptor
- /// class (which involves stashing information/creating the success
- /// mbbb and the failure mbb if we have not created one for this
- /// function yet) and export the guard variable that we are going to
- /// compare.
- ///
- /// c. After we finish selecting the basic block, in FinishBasicBlock if
- /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
- /// initialized, we produce the validation code with one of these
- /// techniques:
- /// 1) with a call to a guard check function
- /// 2) with inlined instrumentation
- ///
- /// 1) We insert a call to the check function before the terminator.
- ///
- /// 2) We first find a splice point in the parent basic block
- /// before the terminator and then splice the terminator of said basic
- /// block into the success basic block. Then we code-gen a new tail for
- /// the parent basic block consisting of the two loads, the comparison,
- /// and finally two branches to the success/failure basic blocks. We
- /// conclude by code-gening the failure basic block if we have not
- /// code-gened it already (all stack protector checks we generate in
- /// the same function, use the same failure basic block).
- class StackProtectorDescriptor {
- public:
- StackProtectorDescriptor() = default;
-
- /// Returns true if all fields of the stack protector descriptor are
- /// initialized implying that we should/are ready to emit a stack protector.
- bool shouldEmitStackProtector() const {
- return ParentMBB && SuccessMBB && FailureMBB;
- }
-
- bool shouldEmitFunctionBasedCheckStackProtector() const {
- return ParentMBB && !SuccessMBB && !FailureMBB;
- }
-
- /// Initialize the stack protector descriptor structure for a new basic
- /// block.
- void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
- bool FunctionBasedInstrumentation) {
- // Make sure we are not initialized yet.
- assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
- "already initialized!");
- ParentMBB = MBB;
- if (!FunctionBasedInstrumentation) {
- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
- }
- }
-
- /// Reset state that changes when we handle different basic blocks.
- ///
- /// This currently includes:
- ///
- /// 1. The specific basic block we are generating a
- /// stack protector for (ParentMBB).
- ///
- /// 2. The successor machine basic block that will contain the tail of
- /// parent mbb after we create the stack protector check (SuccessMBB). This
- /// BB is visited only on stack protector check success.
- void resetPerBBState() {
- ParentMBB = nullptr;
- SuccessMBB = nullptr;
- }
-
- /// Reset state that only changes when we switch functions.
- ///
- /// This currently includes:
- ///
- /// 1. FailureMBB since we reuse the failure code path for all stack
- /// protector checks created in an individual function.
- ///
- /// 2.The guard variable since the guard variable we are checking against is
- /// always the same.
- void resetPerFunctionState() {
- FailureMBB = nullptr;
- }
-
- MachineBasicBlock *getParentMBB() { return ParentMBB; }
- MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
- MachineBasicBlock *getFailureMBB() { return FailureMBB; }
-
- private:
- /// The basic block for which we are generating the stack protector.
- ///
- /// As a result of stack protector generation, we will splice the
- /// terminators of this basic block into the successor mbb SuccessMBB and
- /// replace it with a compare/branch to the successor mbbs
- /// SuccessMBB/FailureMBB depending on whether or not the stack protector
- /// was violated.
- MachineBasicBlock *ParentMBB = nullptr;
-
- /// A basic block visited on stack protector check success that contains the
- /// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB = nullptr;
-
- /// This basic block visited on stack protector check failure that will
- /// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB = nullptr;
-
- /// Add a successor machine basic block to ParentMBB. If the successor mbb
- /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created. Assign a large weight if IsLikely is true.
- MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB = nullptr);
- };
-
private:
const TargetMachine &TM;
@@ -764,6 +567,10 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues, bool isGather);
+ void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues, bool isScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 40083c614a6c..77e9e53668f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
- else if (!G)
+ if (!G)
return "Unknown intrinsic";
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
}
@@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
if (G) {
const MachineFunction *MF = &G->getMachineFunction();
return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
- &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
- *G->getContext());
- } else {
- LLVMContext Ctx;
- return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
- /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ &MF->getFrameInfo(),
+ G->getSubtarget().getInstrInfo(), *G->getContext());
}
+
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode(), G)) {
+ }
+
+ if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
return true;
- } else {
- OS << PrintNodeId(*Value.getNode());
- if (unsigned RN = Value.getResNo())
- OS << ':' << RN;
- return false;
}
+
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
N->print(OS, G);
- if (depth < 1)
- return;
-
for (const SDValue &Op : N->op_values()) {
// Don't follow chain operands.
if (Op.getValueType() == MVT::Other)
continue;
OS << '\n';
- printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1415cce3b1df..c7e37cf8ca14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = TM.Options.ValueTrackingVariableLocations;
+ bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
@@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (ShouldAbort)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
}
@@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine1 && MatchFilterBB)
@@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Second step, hack on the DAG until it only uses operations and types that
@@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Only allow creation of legal node types.
@@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
{
@@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombineLT && MatchFilterBB)
@@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine2 && MatchFilterBB)
@@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (OptLevel != CodeGenOpt::None)
@@ -1045,25 +1046,25 @@ public:
} // end anonymous namespace
// This function is used to enforce the topological node id property
-// property leveraged during Instruction selection. Before selection all
-// nodes are given a non-negative id such that all nodes have a larger id than
+// leveraged during instruction selection. Before the selection process all
+// nodes are given a non-negative id such that all nodes have a greater id than
// their operands. As this holds transitively we can prune checks that a node N
// is a predecessor of M another by not recursively checking through M's
-// operands if N's ID is larger than M's ID. This is significantly improves
-// performance of for various legality checks (e.g. IsLegalToFold /
-// UpdateChains).
+// operands if N's ID is larger than M's ID. This significantly improves
+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).
-// However, when we fuse multiple nodes into a single node
-// during selection we may induce a predecessor relationship between inputs and
-// outputs of distinct nodes being merged violating the topological property.
-// Should a fused node have a successor which has yet to be selected, our
-// legality checks would be incorrect. To avoid this we mark all unselected
-// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// However, when we fuse multiple nodes into a single node during the
+// selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged, violating the topological property.
+// Should a fused node have a successor which has yet to be selected,
+// our legality checks would be incorrect. To avoid this we mark all unselected
+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
// We use bit-negation to more clearly enforce that node id -1 can only be
-// achieved by selected nodes). As the conversion is reversable the original Id,
-// topological pruning can still be leveraged when looking for unselected nodes.
-// This method is call internally in all ISel replacement calls.
+// achieved by selected nodes. As the conversion is reversable to the original
+// Id, topological pruning can still be leveraged when looking for unselected
+// nodes. This method is called internally in all ISel replacement related
+// functions.
void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
SmallVector<SDNode *, 4> Nodes;
Nodes.push_back(Node);
@@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
}
}
-// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
// NodeId with the equivalent node id which is invalid for topological
// pruning.
void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
@@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
bool IsSingleCatchAllClause =
CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
- if (!IsSingleCatchAllClause) {
+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
+ // and they don't need LSDA info
+ bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
for (const User *U : CPI->users()) {
@@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SDB->SPDescriptor.resetPerFunctionState();
}
-/// Given that the input MI is before a partial terminator sequence TSeq, return
-/// true if M + TSeq also a partial terminator sequence.
-///
-/// A Terminator sequence is a sequence of MachineInstrs which at this point in
-/// lowering copy vregs into physical registers, which are then passed into
-/// terminator instructors so we can satisfy ABI constraints. A partial
-/// terminator sequence is an improper subset of a terminator sequence (i.e. it
-/// may be the whole terminator sequence).
-static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
- // If we do not have a copy or an implicit def, we return true if and only if
- // MI is a debug value.
- if (!MI.isCopy() && !MI.isImplicitDef())
- // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
- // physical registers if there is debug info associated with the terminator
- // of our mbb. We want to include said debug info in our terminator
- // sequence, so we return true in that case.
- return MI.isDebugValue();
-
- // We have left the terminator sequence if we are not doing one of the
- // following:
- //
- // 1. Copying a vreg into a physical register.
- // 2. Copying a vreg into a vreg.
- // 3. Defining a register via an implicit def.
-
- // OPI should always be a register definition...
- MachineInstr::const_mop_iterator OPI = MI.operands_begin();
- if (!OPI->isReg() || !OPI->isDef())
- return false;
-
- // Defining any register via an implicit def is always ok.
- if (MI.isImplicitDef())
- return true;
-
- // Grab the copy source...
- MachineInstr::const_mop_iterator OPI2 = OPI;
- ++OPI2;
- assert(OPI2 != MI.operands_end()
- && "Should have a copy implying we should have 2 arguments.");
-
- // Make sure that the copy dest is not a vreg when the copy source is a
- // physical register.
- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
- Register::isPhysicalRegister(OPI2->getReg())))
- return false;
-
- return true;
-}
-
-/// Find the split point at which to splice the end of BB into its success stack
-/// protector check machine basic block.
-///
-/// On many platforms, due to ABI constraints, terminators, even before register
-/// allocation, use physical registers. This creates an issue for us since
-/// physical registers at this point can not travel across basic
-/// blocks. Luckily, selectiondag always moves physical registers into vregs
-/// when they enter functions and moves them through a sequence of copies back
-/// into the physical registers right before the terminator creating a
-/// ``Terminator Sequence''. This function is searching for the beginning of the
-/// terminator sequence so that we can ensure that we splice off not just the
-/// terminator, but additionally the copies that move the vregs into the
-/// physical registers.
-static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- if (SplitPoint == BB->begin())
- return SplitPoint;
-
- MachineBasicBlock::iterator Start = BB->begin();
- MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
-
- if (TII.isTailCall(*SplitPoint) &&
- Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // call itself, then we must insert before the sequence even starts. For
- // example:
- // <split point>
- // ADJCALLSTACKDOWN ...
- // <Moves>
- // ADJCALLSTACKUP ...
- // TAILJMP somewhere
- // On the other hand, it could be an unrelated call in which case this tail call
- // has to register moves of its own and should be the split point. For example:
- // ADJCALLSTACKDOWN
- // CALL something_else
- // ADJCALLSTACKUP
- // <split point>
- // TAILJMP somewhere
- do {
- --Previous;
- if (Previous->isCall())
- return SplitPoint;
- } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
-
- return Previous;
- }
-
- while (MIIsInTerminatorSequence(*Previous)) {
- SplitPoint = Previous;
- if (Previous == Start)
- break;
- --Previous;
- }
-
- return SplitPoint;
-}
-
void
SelectionDAGISel::FinishBasicBlock() {
LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
@@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Second-to-last bit-test with contiguous range: fall through to the
- // target of the final bit test.
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range or omitted range
+ // check: fall through to the target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
} else if (j + 1 == ej) {
// For the last bit test, fall through to Default.
@@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->clear();
CodeGenAndEmitDAG();
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Since we're not going to use the final bit test, remove it.
BTB.Cases.pop_back();
break;
@@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
else
Msg << "unknown intrinsic #" << iid;
}
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
char SelectionDAGISel::ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a903c2401264..e2db9633bfb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
StatepointLoweringInfo SI(DAG);
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1c1dae8f953f..e4a69adff05b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
return Vec;
+ // If this simply widens the lowest subvector, see if we can do it earlier.
+ if (Idx == 0 && Vec.isUndef()) {
+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), NewSub, Op.getOperand(2));
+ }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
- DemandedBits = APInt::getAllOnesValue(BitWidth);
- DemandedElts = APInt::getAllOnesValue(NumElts);
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue() &&
- DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If we can't turn this into a 'not', try to shrink the constant.
- if (!C || !C->isAllOnesValue())
+ if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedBits.isOneValue())
+ if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcBits.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
- if (C && !C->isAllOnesValue() && !C->isOne() &&
- (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
return SDValue();
};
- APInt KnownUndef = APInt::getNullValue(NumElts);
+ APInt KnownUndef = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
- APInt SrcZero, SrcUndef;
- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
KnownZero.setBit(i);
- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
KnownUndef.setBit(i);
}
}
@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcElts.isAllOnesValue() ||
- !DemandedSubElts.isAllOnesValue()) {
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
} else {
KnownBits Known;
- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal.isOneValue();
+ return CVal.isOne();
case ZeroOrNegativeOneBooleanContent:
- return CVal.isAllOnesValue();
+ return CVal.isAllOnes();
}
llvm_unreachable("Invalid boolean contents");
@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
- return CN->isNullValue();
+ return CN->isZero();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return N->isAllOnesValue() && SExt;
+ return N->isAllOnes() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isNullValue())
+ if (YConst && YConst->isZero())
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
- (!N1C->isNullValue() && Cond == ISD::SETNE);
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
- } else if ((N1C->isNullValue() || N1C->isOne()) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
- if (C1.isNullValue())
+ if (C1.isZero())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isNullValue();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ bool CmpZero = N1C->getAPIntValue().isZero();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
@@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
+ OpInfo.ConstraintVT =
+ getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
break;
@@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
@@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
const APInt &Divisor = C->getAPIntValue();
- APInt::ms magics = Divisor.magic();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
- magics.m = 0;
- magics.s = 0;
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
ShiftMask = 0;
- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
@@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- APInt::mu magics = Divisor.magicu();
+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
+ if (magics.IsAdd != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = Divisor.lshr(PreShift).magicu(PreShift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
}
- APInt Magic = magics.m;
+ APInt Magic = magics.Magic;
unsigned SelNPQ;
- if (magics.a == 0 || Divisor.isOneValue()) {
- assert(magics.s < Divisor.getBitWidth() &&
+ if (magics.IsAdd == 0 || Divisor.isOne()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.s;
+ PostShift = magics.ShiftAmount;
SelNPQ = false;
} else {
- PostShift = magics.s - 1;
+ PostShift = magics.ShiftAmount - 1;
SelNPQ = true;
}
@@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getNullValue(EltBits),
+ : APInt::getZero(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
@@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (CDiv->isNullValue())
+ if (CDiv->isZero())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
- ComparingWithAllZeros &= Cmp.isNullValue();
+ ComparingWithAllZeros &= Cmp.isZero();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
@@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
- if (!Cmp.isNullValue())
+ if (!Cmp.isZero())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
@@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isZero())
return SDValue();
bool HadIntMinDivisor = false;
@@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
@@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
@@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
@@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
- if (D.isOneValue()) {
+ if (D.isOne()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
@@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
@@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
@@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
- return false;
+ return SDValue();
// Only expand vector types if we have the appropriate vector bit operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
- Result = Op;
- return true;
+ return Op;
}
-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
@@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
// for now, we do this:
// x = x | (x >> 1);
@@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
@@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTTZ, VT)) {
- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
@@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
- Result =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
- return true;
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
}
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG, bool IsNegative) const {
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// Only expand vector types if we have the appropriate vector operations.
@@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- } else {
- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
}
- return true;
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
@@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
@@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
- unsigned NumSubElts) {
- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
- return Idx;
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
- EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
- if (VecVT.isScalableVector()) {
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
@@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
-
- // Scalable vectors don't need clamping as these are checked at compile time
- if (SubVecVT.isFixedLengthVector()) {
- assert(SubVecVT.getVectorElementType() == EltVT &&
- "Sub-vector must be a fixed vector with matching element type");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
- SubVecVT.getVectorNumElements());
- }
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
@@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
+ if (C->isZero() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
@@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
- IsScaledIndex = false;
- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
- }
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
return IndexType;
}
@@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
@@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
SDValue Result =
@@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
@@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
- VT.getVectorNumElements());
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
SDValue BottomHalf;
SDValue TopHalf;