Vendor import of llvm-project main llvmorg-14-init-10186-gff7f2cfa959b. - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-11-19 20:06:13 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-11-19 20:06:13 +0000
commit	c0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch)
tree	f42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/CodeGen/SelectionDAG
parent	344a3780b2e33f6ca763666c380202b18aab72a3 (diff)

vendor/llvm-project/llvmorg-14-init-10186-gff7f2cfa959b

Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

959

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/FastISel.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

505

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

420

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

918

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

426

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

203

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

180

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

-rw-r--r--

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

481

22 files changed, 2850 insertions, 1568 deletions

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b104e995019f..ce400ea43f29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(

static cl::opt<bool> EnableReduceLoadOpStoreWidth(

"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),

- cl::desc("DAG cominber enable reducing the width of load/op/store "

+ cl::desc("DAG combiner enable reducing the width of load/op/store "

"sequence"));

static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(

"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),

- cl::desc("DAG cominber enable load/<replace bytes>/store with "

+ cl::desc("DAG combiner enable load/<replace bytes>/store with "

"a narrower store"));

namespace {

@@ -319,7 +319,7 @@ namespace {

/// If so, return true.

bool SimplifyDemandedBits(SDValue Op) {

unsigned BitWidth = Op.getScalarValueSizeInBits();

- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);

+ APInt DemandedBits = APInt::getAllOnes(BitWidth);

return SimplifyDemandedBits(Op, DemandedBits);

}

@@ -345,7 +345,7 @@ namespace {

return false;

unsigned NumElts = Op.getValueType().getVectorNumElements();

- APInt DemandedElts = APInt::getAllOnesValue(NumElts);

+ APInt DemandedElts = APInt::getAllOnes(NumElts);

return SimplifyDemandedVectorElts(Op, DemandedElts);

}

@@ -436,7 +436,7 @@ namespace {

SDValue visitOR(SDNode *N);

SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);

SDValue visitXOR(SDNode *N);

- SDValue SimplifyVBinOp(SDNode *N);

+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);

SDValue visitSHL(SDNode *N);

SDValue visitSRA(SDNode *N);

SDValue visitSRL(SDNode *N);

@@ -515,6 +515,7 @@ namespace {

SDValue visitFP_TO_FP16(SDNode *N);

SDValue visitFP16_TO_FP(SDNode *N);

SDValue visitVECREDUCE(SDNode *N);

+ SDValue visitVPOp(SDNode *N);

SDValue visitFADDForFMACombine(SDNode *N);

SDValue visitFSUBForFMACombine(SDNode *N);

@@ -615,7 +616,7 @@ namespace {

SmallVectorImpl<SDValue> &Aliases);

/// Return true if there is any possibility that the two addresses overlap.

- bool isAlias(SDNode *Op0, SDNode *Op1) const;

+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;

/// Walk up chain skipping non-aliasing memory nodes, looking for a better

/// chain (aliasing node.)

@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,

if (N0.getOpcode() != Opc)

return SDValue();

- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {

- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {

+ SDValue N00 = N0.getOperand(0);

+ SDValue N01 = N0.getOperand(1);

+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {

+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {

// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))

- if (SDValue OpNode =

- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))

- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);

+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))

+ return DAG.getNode(Opc, DL, VT, N00, OpNode);

return SDValue();

}

if (N0.hasOneUse()) {

// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)

// iff (op x, c1) has one use

- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);

- if (!OpNode.getNode())

- return SDValue();

- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));

+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))

+ return DAG.getNode(Opc, DL, VT, OpNode, N01);

+ return SDValue();

}

return SDValue();

@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {

case ISD::VECREDUCE_UMIN:

case ISD::VECREDUCE_FMAX:

case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);

+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:

+#include "llvm/IR/VPIntrinsics.def"

+ return visitVPOp(N);

}

return SDValue();

}

@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (add x, 0) -> x, vector edition

@@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {

N0.getOperand(0));

// fold (add (add (xor a, -1), b), 1) -> (sub b, a)

- if (N0.getOpcode() == ISD::ADD ||

- N0.getOpcode() == ISD::UADDO ||

- N0.getOpcode() == ISD::SADDO) {

+ if (N0.getOpcode() == ISD::ADD) {

SDValue A, Xor;

if (isBitwiseNot(N0.getOperand(0))) {

@@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,

IsFlip = Const->isOne();

break;

case TargetLowering::ZeroOrNegativeOneBooleanContent:

- IsFlip = Const->isAllOnesValue();

+ IsFlip = Const->isAllOnes();

break;

case TargetLowering::UndefinedBooleanContent:

IsFlip = (Const->getAPIntValue() & 0x01) == 1;

@@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (sub x, 0) -> x, vector edition

@@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {

}

// Convert 0 - abs(x).

- SDValue Result;

if (N1->getOpcode() == ISD::ABS &&

- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&

- TLI.expandABS(N1.getNode(), Result, DAG, true))

- return Result;

+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))

+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))

+ return Result;

// Fold neg(splat(neg(x)) -> splat(x)

if (VT.isVector()) {

@@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);

@@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {

return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);

// fold (mul x, 0) -> 0

- if (N1IsConst && ConstValue1.isNullValue())

+ if (N1IsConst && ConstValue1.isZero())

return N1;

// fold (mul x, 1) -> x

- if (N1IsConst && ConstValue1.isOneValue())

+ if (N1IsConst && ConstValue1.isOne())

return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))

return NewSel;

// fold (mul x, -1) -> 0-x

- if (N1IsConst && ConstValue1.isAllOnesValue()) {

+ if (N1IsConst && ConstValue1.isAllOnes()) {

SDLoc DL(N);

return DAG.getNode(ISD::SUB, DL, VT,

DAG.getConstant(0, DL, VT), N0);

@@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {

}

// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c

- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {

+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {

unsigned Log2Val = (-ConstValue1).logBase2();

SDLoc DL(N);

// FIXME: If the input is something that is easily negated (e.g. a

@@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {

SmallBitVector ClearMask;

ClearMask.reserve(NumElts);

auto IsClearMask = [&ClearMask](ConstantSDNode *V) {

- if (!V || V->isNullValue()) {

+ if (!V || V->isZero()) {

ClearMask.push_back(true);

return true;

}

@@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {

SDValue Op0 = Node->getOperand(0);

SDValue Op1 = Node->getOperand(1);

SDValue combined;

- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),

- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {

- SDNode *User = *UI;

+ for (SDNode *User : Op0.getNode()->uses()) {

if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||

User->use_empty())

continue;

@@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {

// 0 / X -> 0

// 0 % X -> 0

ConstantSDNode *N0C = isConstOrConstSplat(N0);

- if (N0C && N0C->isNullValue())

+ if (N0C && N0C->isZero())

return N0;

// X / X -> 1

@@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {

SDValue N1 = N->getOperand(1);

EVT VT = N->getValueType(0);

EVT CCVT = getSetCCResultType(VT);

+ SDLoc DL(N);

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

- SDLoc DL(N);

// fold (sdiv c1, c2) -> c1/c2

ConstantSDNode *N1C = isConstOrConstSplat(N1);

if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))

return C;

// fold (sdiv X, -1) -> 0-X

- if (N1C && N1C->isAllOnesValue())

+ if (N1C && N1C->isAllOnes())

return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);

// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)

@@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {

// Helper for determining whether a value is a power-2 constant scalar or a

// vector of such elements.

auto IsPowerOfTwo = [](ConstantSDNode *C) {

- if (C->isNullValue() || C->isOpaque())

+ if (C->isZero() || C->isOpaque())

return false;

if (C->getAPIntValue().isPowerOf2())

return true;

- if ((-C->getAPIntValue()).isPowerOf2())

+ if (C->getAPIntValue().isNegatedPowerOf2())

return true;

return false;

};

@@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {

SDValue N1 = N->getOperand(1);

EVT VT = N->getValueType(0);

EVT CCVT = getSetCCResultType(VT);

+ SDLoc DL(N);

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

- SDLoc DL(N);

// fold (udiv c1, c2) -> c1/c2

ConstantSDNode *N1C = isConstOrConstSplat(N1);

if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))

return C;

// fold (udiv X, -1) -> select(X == -1, 1, 0)

- if (N1C && N1C->getAPIntValue().isAllOnesValue())

+ if (N1C && N1C->isAllOnes())

return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),

DAG.getConstant(1, DL, VT),

DAG.getConstant(0, DL, VT));

@@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {

return C;

// fold (urem X, -1) -> select(X == -1, 0, x)

- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())

+ if (!isSigned && N1C && N1C->isAllOnes())

return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),

DAG.getConstant(0, DL, VT), N0);

@@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {

if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))

return C;

+ // canonicalize constant to RHS.

+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&

+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))

+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);

// fold (mulhs x, 0) -> 0

if (isNullConstant(N1))

return N1;

@@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {

if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))

return C;

+ // canonicalize constant to RHS.

+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&

+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))

+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);

// fold (mulhu x, 0) -> 0

if (isNullConstant(N1))

return N1;

@@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {

}

+ // Simplify the operands using demanded-bits information.

+ // We don't have demanded bits support for MULHU so this just enables constant

+ // folding based on known bits.

+ if (SimplifyDemandedBits(SDValue(N, 0)))

+ return SDValue(N, 0);

return SDValue();

}

@@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {

SDValue N1 = N->getOperand(1);

EVT VT = N0.getValueType();

unsigned Opcode = N->getOpcode();

+ SDLoc DL(N);

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold operation with constant operands.

- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))

+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))

return C;

// canonicalize constant to RHS

if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&

!DAG.isConstantIntBuildVectorOrConstantInt(N1))

- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);

+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);

// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.

// Only do this if the current op isn't legal and the flipped is.

@@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {

default: llvm_unreachable("Unknown MINMAX opcode");

}

if (TLI.isOperationLegal(AltOpcode, VT))

- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);

+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);

}

// Simplify the operands using demanded-bits information.

@@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {

if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))

return V;

+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.

if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&

- VT.getSizeInBits() <= 64) {

+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {

if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {

// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal

@@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {

return DAG.getZExtOrTrunc(Setcc, DL, VT);

}

+/// For targets that support usubsat, match a bit-hack form of that operation

+/// that ends in 'and' and convert it.

+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ EVT VT = N1.getValueType();

+ // Canonicalize SRA as operand 1.

+ if (N0.getOpcode() == ISD::SRA)

+ std::swap(N0, N1);

+ // xor/add with SMIN (signmask) are logically equivalent.

+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)

+ return SDValue();

+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||

+ N0.getOperand(0) != N1.getOperand(0))

+ return SDValue();

+ unsigned BitWidth = VT.getScalarSizeInBits();

+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);

+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);

+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||

+ !SraC || SraC->getAPIntValue() != BitWidth - 1)

+ return SDValue();

+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128

+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128

+ SDLoc DL(N);

+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);

+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);

SDValue DAGCombiner::visitAND(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

// fold (and x, 0) -> 0, vector edition

if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))

// do not return N0, because undef node may exist in N0

- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),

+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),

SDLoc(N), N0.getValueType());

if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))

// do not return N1, because undef node may exist in N1

- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),

+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),

SDLoc(N), N1.getValueType());

// fold (and x, -1) -> x, vector edition

@@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

// if (and x, c) is known to be zero, return 0

unsigned BitWidth = VT.getScalarSizeInBits();

- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),

- APInt::getAllOnesValue(BitWidth)))

+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))

return DAG.getConstant(0, SDLoc(N), VT);

if (SDValue NewSel = foldBinOpIntoSelect(N))

@@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

// Get the constant (if applicable) the zero'th operand is being ANDed with.

// This can be a pure constant or a vector splat, in which case we treat the

// vector as a scalar and use the splat value.

- APInt Constant = APInt::getNullValue(1);

+ APInt Constant = APInt::getZero(1);

if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {

Constant = C->getAPIntValue();

} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {

@@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a

// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.

if ((SplatBitSize % EltBitWidth) == 0) {

- Constant = APInt::getAllOnesValue(EltBitWidth);

+ Constant = APInt::getAllOnes(EltBitWidth);

for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)

Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);

}

@@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

case ISD::NON_EXTLOAD: B = true; break;

}

- if (B && Constant.isAllOnesValue()) {

+ if (B && Constant.isAllOnes()) {

// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to

// preserve semantics once we get rid of the AND.

SDValue NewLoad(Load, 0);

@@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

if (IsAndZeroExtMask(N0, N1))

return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));

+ if (hasOperation(ISD::USUBSAT, VT))

+ if (SDValue V = foldAndToUsubsat(N, DAG))

+ return V;

return SDValue();

}

@@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

// fold (or x, 0) -> x, vector edition

@@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,

// a rot[lr]. This also matches funnel shift patterns, similar to rotation but

// with different shifted sources.

SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

- // Must be a legal type. Expanded 'n promoted things won't work with rotates.

EVT VT = LHS.getValueType();

- if (!TLI.isTypeLegal(VT))

- return SDValue();

// The target must have at least one rotate/funnel flavor.

+ // We still try to match rotate by constant pre-legalization.

+ // TODO: Support pre-legalization funnel-shift by constant.

bool HasROTL = hasOperation(ISD::ROTL, VT);

bool HasROTR = hasOperation(ISD::ROTR, VT);

bool HasFSHL = hasOperation(ISD::FSHL, VT);

bool HasFSHR = hasOperation(ISD::FSHR, VT);

- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)

+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)

return SDValue();

// Check for truncated rotate.

@@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

if (LHSShift.getOpcode() == RHSShift.getOpcode())

return SDValue(); // Shifts must disagree.

+ // TODO: Support pre-legalization funnel-shift by constant.

bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);

if (!IsRotate && !(HasFSHL || HasFSHR))

return SDValue(); // Requires funnel shift support.

@@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

};

if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {

SDValue Res;

- if (IsRotate && (HasROTL || HasROTR))

- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,

- HasROTL ? LHSShiftAmt : RHSShiftAmt);

- else

- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,

- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);

+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {

+ bool UseROTL = !LegalOperations || HasROTL;

+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,

+ UseROTL ? LHSShiftAmt : RHSShiftAmt);

+ } else {

+ bool UseFSHL = !LegalOperations || HasFSHL;

+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,

+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);

+ }

// If there is an AND of either shifted operand, apply it to the result.

if (LHSMask.getNode() || RHSMask.getNode()) {

@@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

return Res;

}

+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable

+ // shift.

+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)

+ return SDValue();

// If there is a mask here, and we have a variable shift, we can't be sure

// that we're masking out the right stuff.

if (LHSMask.getNode() || RHSMask.getNode())

@@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {

// TODO: If there is evidence that running this later would help, this

// limitation could be removed. Legality checks may need to be added

// for the created store and optional bswap/rotate.

- if (LegalOperations)

+ if (LegalOperations || OptLevel == CodeGenOpt::None)

return SDValue();

// We only handle merging simple stores of 1-4 bytes.

@@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {

// | D |

// Into:

// (x & m) | (y & ~m)

-// If y is a constant, and the 'andn' does not work with immediates,

-// we unfold into a different pattern:

+// If y is a constant, m is not a 'not', and the 'andn' does not work with

+// immediates, we unfold into a different pattern:

// ~(~x & m) & (m | y)

+// If x is a constant, m is a 'not', and the 'andn' does not work with

+// immediates, we unfold into a different pattern:

+// (x | ~m) & ~(~m & ~y)

// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at

// the very least that breaks andnpd / andnps patterns, and because those

// patterns are simplified in IR and shouldn't be created in the DAG

@@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {

SDLoc DL(N);

- // If Y is a constant, check that 'andn' works with immediates.

- if (!TLI.hasAndNot(Y)) {

+ // If Y is a constant, check that 'andn' works with immediates. Unless M is

+ // a bitwise not that would already allow ANDN to be used.

+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {

assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");

// If not, we need to do a bit more work to make sure andn is still used.

SDValue NotX = DAG.getNOT(DL, X, VT);

@@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {

return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);

}

+ // If X is a constant and M is a bitwise not, check that 'andn' works with

+ // immediates.

+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {

+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");

+ // If not, we need to do a bit more work to make sure andn is still used.

+ SDValue NotM = M.getOperand(0);

+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);

+ SDValue NotY = DAG.getNOT(DL, Y, VT);

+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);

+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);

+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);

+ }

SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);

SDValue NotM = DAG.getNOT(DL, M, VT);

SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);

@@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

EVT VT = N0.getValueType();

+ SDLoc DL(N);

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (xor x, 0) -> x, vector edition

@@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {

}

// fold (xor undef, undef) -> 0. This is a common idiom (misuse).

- SDLoc DL(N);

if (N0.isUndef() && N1.isUndef())

return DAG.getConstant(0, DL, VT);

@@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {

// shift has been simplified to undef.

uint64_t ShiftAmt = ShiftC->getLimitedValue();

if (ShiftAmt < BitWidth) {

- APInt Ones = APInt::getAllOnesValue(BitWidth);

+ APInt Ones = APInt::getAllOnes(BitWidth);

Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);

if (XorC->getAPIntValue() == Ones) {

// If the xor constant is a shifted -1, do a 'not' before the shift:

@@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);

@@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {

return NewSel;

// if (shl x, c) is known to be zero, return 0

- if (DAG.MaskedValueIsZero(SDValue(N, 0),

- APInt::getAllOnesValue(OpSizeInBits)))

+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))

return DAG.getConstant(0, SDLoc(N), VT);

// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).

@@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,

// Both operands must be equivalent extend nodes.

SDValue LeftOp = ShiftOperand.getOperand(0);

SDValue RightOp = ShiftOperand.getOperand(1);

bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;

bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;

- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())

+ if (!IsSignExt && !IsZeroExt)

return SDValue();

- EVT WideVT1 = LeftOp.getValueType();

- EVT WideVT2 = RightOp.getValueType();

- (void)WideVT2;

+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();

+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();

+ SDValue MulhRightOp;

+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {

+ unsigned ActiveBits = IsSignExt

+ ? Constant->getAPIntValue().getMinSignedBits()

+ : Constant->getAPIntValue().getActiveBits();

+ if (ActiveBits > NarrowVTSize)

+ return SDValue();

+ MulhRightOp = DAG.getConstant(

+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,

+ NarrowVT);

+ } else {

+ if (LeftOp.getOpcode() != RightOp.getOpcode())

+ return SDValue();

+ // Check that the two extend nodes are the same type.

+ if (NarrowVT != RightOp.getOperand(0).getValueType())

+ return SDValue();

+ MulhRightOp = RightOp.getOperand(0);

+ }

+ EVT WideVT = LeftOp.getValueType();

// Proceed with the transformation if the wide types match.

- assert((WideVT1 == WideVT2) &&

+ assert((WideVT == RightOp.getValueType()) &&

"Cannot have a multiply node with two different operand types.");

- EVT NarrowVT = LeftOp.getOperand(0).getValueType();

- // Check that the two extend nodes are the same type.

- if (NarrowVT != RightOp.getOperand(0).getValueType())

- return SDValue();

// Proceed with the transformation if the wide type is twice as large

// as the narrow type.

- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();

- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)

+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)

return SDValue();

// Check the shift amount with the narrow type size.

@@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,

if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))

return SDValue();

- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),

- RightOp.getOperand(0));

- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)

- : DAG.getZExtOrTrunc(Result, DL, WideVT1));

+ SDValue Result =

+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);

+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)

+ : DAG.getZExtOrTrunc(Result, DL, WideVT));

}

SDValue DAGCombiner::visitSRA(SDNode *N) {

@@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

ConstantSDNode *N1C = isConstOrConstSplat(N1);

@@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))

return FoldedVOp;

ConstantSDNode *N1C = isConstOrConstSplat(N1);

@@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {

return NewSel;

// if (srl x, c) is known to be zero, return 0

- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),

- APInt::getAllOnesValue(OpSizeInBits)))

+ if (N1C &&

+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))

return DAG.getConstant(0, SDLoc(N), VT);

// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))

@@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {

// is also a target-independent combine here in DAGCombiner in the other

// direction for (select Cond, -1, 0) when the condition is not i1.

if (CondVT == MVT::i1 && !LegalOperations) {

- if (C1->isNullValue() && C2->isOne()) {

+ if (C1->isZero() && C2->isOne()) {

// select Cond, 0, 1 --> zext (!Cond)

SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);

if (VT != MVT::i1)

NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);

return NotCond;

}

- if (C1->isNullValue() && C2->isAllOnesValue()) {

+ if (C1->isZero() && C2->isAllOnes()) {

// select Cond, 0, -1 --> sext (!Cond)

SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);

if (VT != MVT::i1)

NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);

return NotCond;

}

- if (C1->isOne() && C2->isNullValue()) {

+ if (C1->isOne() && C2->isZero()) {

// select Cond, 1, 0 --> zext (Cond)

if (VT != MVT::i1)

Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);

return Cond;

}

- if (C1->isAllOnesValue() && C2->isNullValue()) {

+ if (C1->isAllOnes() && C2->isZero()) {

// select Cond, -1, 0 --> sext (Cond)

if (VT != MVT::i1)

Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);

@@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {

}

// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)

- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {

+ if (C1Val.isPowerOf2() && C2Val.isZero()) {

if (VT != MVT::i1)

Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);

SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);

@@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {

TargetLowering::ZeroOrOneBooleanContent &&

TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==

TargetLowering::ZeroOrOneBooleanContent &&

- C1->isNullValue() && C2->isOne()) {

+ C1->isZero() && C2->isOne()) {

SDValue NotCond =

DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));

if (VT.bitsEq(CondVT))

@@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {

return SDValue();

}

+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue N2 = N->getOperand(2);

+ EVT VT = N->getValueType(0);

+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())

+ return SDValue();

+ SDValue Cond0 = N0.getOperand(0);

+ SDValue Cond1 = N0.getOperand(1);

+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();

+ if (VT != Cond0.getValueType())

+ return SDValue();

+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the

+ // compare is inverted from that pattern ("Cond0 s> -1").

+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))

+ ; // This is the pattern we are looking for.

+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))

+ std::swap(N1, N2);

+ else

+ return SDValue();

+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1

+ if (isNullOrNullSplat(N2)) {

+ SDLoc DL(N);

+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);

+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);

+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);

+ }

+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2

+ if (isAllOnesOrAllOnesSplat(N1)) {

+ SDLoc DL(N);

+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);

+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);

+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);

+ }

+ // If we have to invert the sign bit mask, only do that transform if the

+ // target has a bitwise 'and not' instruction (the invert is free).

+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {

+ SDLoc DL(N);

+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);

+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);

+ SDValue Not = DAG.getNOT(DL, Sra, VT);

+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);

+ }

+ // TODO: There's another pattern in this family, but it may require

+ // implementing hasOrNot() to check for profitability:

+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2

+ return SDValue();

SDValue DAGCombiner::visitSELECT(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {

"same value. This should have been addressed before this function.");

return DAG.getNode(

ISD::CONCAT_VECTORS, DL, VT,

- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),

- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));

+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),

+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));

}

bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {

@@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {

if (SDValue V = foldVSelectOfConstants(N))

return V;

+ if (hasOperation(ISD::SRA, VT))

+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))

+ return V;

return SDValue();

}

@@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {

AddToWorklist(SCC.getNode());

if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {

- if (!SCCC->isNullValue())

+ if (!SCCC->isZero())

return N2; // cond always true -> true val

else

return N3; // cond always false -> false val

@@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {

// Is 'X Cond C' always true or false?

auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {

- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||

+ bool False = (Cond == ISD::SETULT && C->isZero()) ||

(Cond == ISD::SETLT && C->isMinSignedValue()) ||

- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||

+ (Cond == ISD::SETUGT && C->isAllOnes()) ||

(Cond == ISD::SETGT && C->isMaxSignedValue());

- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||

+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||

(Cond == ISD::SETLE && C->isMaxSignedValue()) ||

- (Cond == ISD::SETUGE && C->isNullValue()) ||

+ (Cond == ISD::SETUGE && C->isZero()) ||

(Cond == ISD::SETGE && C->isMinSignedValue());

return True || False;

};

@@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,

if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)

return SDValue();

- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))

+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))

return SDValue();

if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))

@@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,

Known = DAG.computeKnownBits(Op);

- return (Known.Zero | 1).isAllOnesValue();

+ return (Known.Zero | 1).isAllOnes();

}

/// Given an extending node with a pop-count operand, if the target does not

@@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);

// If the input is already sign extended, just drop the extension.

- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))

+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))

return N0;

// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2

@@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {

SDValue N00 = N0.getOperand(0);

unsigned N00Bits = N00.getScalarValueSizeInBits();

- if ((N00Bits <= ExtVTBits ||

- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&

+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&

(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))

return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);

}

@@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);

if ((N00Bits == ExtVTBits ||

(!IsZext && (N00Bits < ExtVTBits ||

- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <

- ExtVTBits))) &&

+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&

(!LegalOperations ||

TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))

return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);

@@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {

SDValue Amt = N0.getOperand(1);

KnownBits Known = DAG.computeKnownBits(Amt);

unsigned Size = VT.getScalarSizeInBits();

- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {

+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {

SDLoc SL(N);

EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

@@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {

SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {

assert(N->getOpcode() == ISD::BUILD_PAIR);

- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));

- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));

+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));

+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));

// A BUILD_PAIR is always having the least significant part in elt 0 and the

// most significant part in elt 1. So when combining into one large load, we

@@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {

if (DAG.getDataLayout().isBigEndian())

std::swap(LD1, LD2);

- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||

+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||

+ !LD1->hasOneUse() || !LD2->hasOneUse() ||

LD1->getAddressSpace() != LD2->getAddressSpace())

return SDValue();

+ bool LD1Fast = false;

EVT LD1VT = LD1->getValueType(0);

unsigned LD1Bytes = LD1VT.getStoreSize();

- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&

- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {

- Align Alignment = LD1->getAlign();

- Align NewAlign = DAG.getDataLayout().getABITypeAlign(

- VT.getTypeForEVT(*DAG.getContext()));

- if (NewAlign <= Alignment &&

- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))

- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),

- LD1->getPointerInfo(), Alignment);

- }

+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&

+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&

+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,

+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)

+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),

+ LD1->getPointerInfo(), LD1->getAlign());

return SDValue();

}

@@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {

return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);

}

- SDLoc DL(BV);

// Okay, we know the src/dst types are both integers of differing types.

- // Handling growing first.

assert(SrcEltVT.isInteger() && DstEltVT.isInteger());

- if (SrcBitSize < DstBitSize) {

- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;

- SmallVector<SDValue, 8> Ops;

- for (unsigned i = 0, e = BV->getNumOperands(); i != e;

- i += NumInputsPerOutput) {

- bool isLE = DAG.getDataLayout().isLittleEndian();

- APInt NewBits = APInt(DstBitSize, 0);

- bool EltIsUndef = true;

- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {

- // Shift the previously computed bits over.

- NewBits <<= SrcBitSize;

- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));

- if (Op.isUndef()) continue;

- EltIsUndef = false;

- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().

- zextOrTrunc(SrcBitSize).zext(DstBitSize);

- }

- if (EltIsUndef)

- Ops.push_back(DAG.getUNDEF(DstEltVT));

- else

- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));

- }

+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a

+ // BuildVectorSDNode?

+ auto *BVN = cast<BuildVectorSDNode>(BV);

- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());

- return DAG.getBuildVector(VT, DL, Ops);

- }

+ // Extract the constant raw bit data.

+ BitVector UndefElements;

+ SmallVector<APInt> RawBits;

+ bool IsLE = DAG.getDataLayout().isLittleEndian();

+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))

+ return SDValue();

- // Finally, this must be the case where we are shrinking elements: each input

- // turns into multiple outputs.

- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;

- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,

- NumOutputsPerInput*BV->getNumOperands());

+ SDLoc DL(BV);

SmallVector<SDValue, 8> Ops;

+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {

+ if (UndefElements[I])

+ Ops.push_back(DAG.getUNDEF(DstEltVT));

+ else

+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));

+ }

- for (const SDValue &Op : BV->op_values()) {

- if (Op.isUndef()) {

- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));

- continue;

- }

- APInt OpVal = cast<ConstantSDNode>(Op)->

- getAPIntValue().zextOrTrunc(SrcBitSize);

+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());

+ return DAG.getBuildVector(VT, DL, Ops);

- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {

- APInt ThisVal = OpVal.trunc(DstBitSize);

- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));

- OpVal.lshrInPlace(DstBitSize);

- }

+// Returns true if floating point contraction is allowed on the FMUL-SDValue

+// `N`

+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {

+ assert(N.getOpcode() == ISD::FMUL);

- // For big endian targets, swap the order of the pieces of each element.

- if (DAG.getDataLayout().isBigEndian())

- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());

- }

+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||

+ N->getFlags().hasAllowContract();

- return DAG.getBuildVector(VT, DL, Ops);

+// Returns true if `N` can assume no infinities involved in its computation.

+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {

+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();

}

/// Try to perform FMA combining on a given FADD node.

@@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;

bool Aggressive = TLI.enableAggressiveFMAFusion(VT);

+ auto isFusedOp = [&](SDValue N) {

+ unsigned Opcode = N.getOpcode();

+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;

+ };

// Is the node an FMUL and contractable either due to global flags or

// SDNodeFlags.

auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {

@@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)

// This requires reassociation because it changes the order of operations.

SDValue FMA, E;

- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&

+ if (CanReassociate && isFusedOp(N0) &&

N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&

N0.getOperand(2).hasOneUse()) {

FMA = N0;

E = N1;

- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&

+ } else if (CanReassociate && isFusedOp(N1) &&

N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&

N1.getOperand(2).hasOneUse()) {

FMA = N1;

@@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

DAG.getNode(ISD::FP_EXTEND, SL, VT, V),

Z));

};

- if (N0.getOpcode() == PreferredFusedOpcode) {

+ if (isFusedOp(N0)) {

SDValue N02 = N0.getOperand(2);

if (N02.getOpcode() == ISD::FP_EXTEND) {

SDValue N020 = N02.getOperand(0);

@@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

};

if (N0.getOpcode() == ISD::FP_EXTEND) {

SDValue N00 = N0.getOperand(0);

- if (N00.getOpcode() == PreferredFusedOpcode) {

+ if (isFusedOp(N00)) {

SDValue N002 = N00.getOperand(2);

if (isContractableFMUL(N002) &&

TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,

@@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

// fold (fadd x, (fma y, z, (fpext (fmul u, v)))

// -> (fma y, z, (fma (fpext u), (fpext v), x))

- if (N1.getOpcode() == PreferredFusedOpcode) {

+ if (isFusedOp(N1)) {

SDValue N12 = N1.getOperand(2);

if (N12.getOpcode() == ISD::FP_EXTEND) {

SDValue N120 = N12.getOperand(0);

@@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {

// interesting for all targets, especially GPUs.

if (N1.getOpcode() == ISD::FP_EXTEND) {

SDValue N10 = N1.getOperand(0);

- if (N10.getOpcode() == PreferredFusedOpcode) {

+ if (isFusedOp(N10)) {

SDValue N102 = N10.getOperand(2);

if (isContractableFMUL(N102) &&

TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,

@@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

return isContractableFMUL(N) && isReassociable(N.getNode());

};

+ auto isFusedOp = [&](SDValue N) {

+ unsigned Opcode = N.getOpcode();

+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;

+ };

// More folding opportunities when target permits.

if (Aggressive && isReassociable(N)) {

bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();

// fold (fsub (fma x, y, (fmul u, v)), z)

// -> (fma x, y (fma u, v, (fneg z)))

- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&

+ if (CanFuse && isFusedOp(N0) &&

isContractableAndReassociableFMUL(N0.getOperand(2)) &&

N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {

return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),

@@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

// fold (fsub x, (fma y, z, (fmul u, v)))

// -> (fma (fneg y), z, (fma (fneg u), v, x))

- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&

+ if (CanFuse && isFusedOp(N1) &&

isContractableAndReassociableFMUL(N1.getOperand(2)) &&

N1->hasOneUse() && NoSignedZero) {

SDValue N20 = N1.getOperand(2).getOperand(0);

@@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

// fold (fsub (fma x, y, (fpext (fmul u, v))), z)

// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))

- if (N0.getOpcode() == PreferredFusedOpcode &&

- N0->hasOneUse()) {

+ if (isFusedOp(N0) && N0->hasOneUse()) {

SDValue N02 = N0.getOperand(2);

if (N02.getOpcode() == ISD::FP_EXTEND) {

SDValue N020 = N02.getOperand(0);

@@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

// interesting for all targets, especially GPUs.

if (N0.getOpcode() == ISD::FP_EXTEND) {

SDValue N00 = N0.getOperand(0);

- if (N00.getOpcode() == PreferredFusedOpcode) {

+ if (isFusedOp(N00)) {

SDValue N002 = N00.getOperand(2);

if (isContractableAndReassociableFMUL(N002) &&

TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,

@@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

// fold (fsub x, (fma y, z, (fpext (fmul u, v))))

// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))

- if (N1.getOpcode() == PreferredFusedOpcode &&

- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&

+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&

N1->hasOneUse()) {

SDValue N120 = N1.getOperand(2).getOperand(0);

if (isContractableAndReassociableFMUL(N120) &&

@@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {

// FIXME: This turns two single-precision and one double-precision

// operation into two double-precision operations, which might not be

// interesting for all targets, especially GPUs.

- if (N1.getOpcode() == ISD::FP_EXTEND &&

- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {

+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {

SDValue CvtSrc = N1.getOperand(0);

SDValue N100 = CvtSrc.getOperand(0);

SDValue N101 = CvtSrc.getOperand(1);

@@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {

// The transforms below are incorrect when x == 0 and y == inf, because the

// intermediate multiplication produces a nan.

- if (!Options.NoInfsFPMath)

+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;

+ if (!hasNoInfs(Options, FAdd))

return SDValue();

// Floating-point multiply-add without intermediate rounding.

bool HasFMA =

- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&

+ isContractableFMUL(Options, SDValue(N, 0)) &&

TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&

(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));

@@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (fadd c1, c2) -> c1 + c2

@@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (fsub c1, c2) -> c1-c2

@@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {

// fold vector ops

if (VT.isVector()) {

// This just handles C1 * C2 for vectors. Other vector folds are below.

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

}

@@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {

if (N1CFP && N1CFP->isExactlyValue(+2.0))

return DAG.getNode(ISD::FADD, DL, VT, N0, N0);

- // fold (fmul X, -1.0) -> (fneg X)

- if (N1CFP && N1CFP->isExactlyValue(-1.0))

- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

- return DAG.getNode(ISD::FNEG, DL, VT, N0);

+ // fold (fmul X, -1.0) -> (fsub -0.0, X)

+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {

+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {

+ return DAG.getNode(ISD::FSUB, DL, VT,

+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);

+ }

// -N0 * -N1 --> N0 * N1

TargetLowering::NegatibleCost CostN0 =

@@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {

// fold vector ops

if (VT.isVector())

- if (SDValue FoldedVOp = SimplifyVBinOp(N))

+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))

return FoldedVOp;

// fold (fdiv c1, c2) -> c1/c2

@@ -16245,11 +16381,12 @@ struct LoadedSlice {

return false;

// Check if it will be merged with the load.

- // 1. Check the alignment constraint.

- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(

- ResVT.getTypeForEVT(*DAG->getContext()));

- if (RequiredAlignment > getAlign())

+ // 1. Check the alignment / fast memory access constraint.

+ bool IsFast = false;

+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,

+ Origin->getAddressSpace(), getAlign(),

+ Origin->getMemOperand()->getFlags(), &IsFast) ||

+ !IsFast)

return false;

// 2. Check that the load is a legal operation for that type.

@@ -16270,7 +16407,7 @@ struct LoadedSlice {

/// \p UsedBits looks like 0..0 1..1 0..0.

static bool areUsedBitsDense(const APInt &UsedBits) {

// If all the bits are one, this is dense!

- if (UsedBits.isAllOnesValue())

+ if (UsedBits.isAllOnes())

return true;

// Get rid of the unused bits on the right.

@@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {

if (NarrowedUsedBits.countLeadingZeros())

NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());

// Check that the chunk of bits is completely used.

- return NarrowedUsedBits.isAllOnesValue();

+ return NarrowedUsedBits.isAllOnes();

}

/// Check whether or not \p First and \p Second are next to each other

@@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {

unsigned BitWidth = N1.getValueSizeInBits();

APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();

if (Opc == ISD::AND)

- Imm ^= APInt::getAllOnesValue(BitWidth);

- if (Imm == 0 || Imm.isAllOnesValue())

+ Imm ^= APInt::getAllOnes(BitWidth);

+ if (Imm == 0 || Imm.isAllOnes())

return SDValue();

unsigned ShAmt = Imm.countTrailingZeros();

unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;

@@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {

if ((Imm & Mask) == Imm) {

APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);

if (Opc == ISD::AND)

- NewImm ^= APInt::getAllOnesValue(NewBW);

+ NewImm ^= APInt::getAllOnes(NewBW);

uint64_t PtrOff = ShAmt / 8;

// For big endian targets, we need to adjust the offset to the pointer to

// load the correct bytes.

if (DAG.getDataLayout().isBigEndian())

PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;

+ bool IsFast = false;

Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);

- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());

- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))

+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,

+ LD->getAddressSpace(), NewAlign,

+ LD->getMemOperand()->getFlags(), &IsFast) ||

+ !IsFast)

return SDValue();

SDValue NewPtr =

@@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {

if (VTSize.isScalable())

return SDValue();

+ bool FastLD = false, FastST = false;

EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());

if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||

!TLI.isOperationLegal(ISD::STORE, IntVT) ||

!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||

- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))

- return SDValue();

- Align LDAlign = LD->getAlign();

- Align STAlign = ST->getAlign();

- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());

- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);

- if (LDAlign < ABIAlign || STAlign < ABIAlign)

+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||

+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,

+ *LD->getMemOperand(), &FastLD) ||

+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,

+ *ST->getMemOperand(), &FastST) ||

+ !FastLD || !FastST)

return SDValue();

SDValue NewLD =

DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),

- LD->getPointerInfo(), LDAlign);

+ LD->getPointerInfo(), LD->getAlign());

SDValue NewST =

DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),

- ST->getPointerInfo(), STAlign);

+ ST->getPointerInfo(), ST->getAlign());

AddToWorklist(NewLD.getNode());

AddToWorklist(NewST.getNode());

@@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,

SDValue &ConstNode) {

APInt Val;

- // If the add only has one use, this would be OK to do.

- if (AddNode.getNode()->hasOneUse())

+ // If the add only has one use, and the target thinks the folding is

+ // profitable or does not lead to worse code, this would be OK to do.

+ if (AddNode.getNode()->hasOneUse() &&

+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))

return true;

// Walk all the users of the constant with which we're multiplying.

@@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(

unsigned SizeInBits = NumStores * ElementSizeBits;

unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;

+ Optional<MachineMemOperand::Flags> Flags;

+ AAMDNodes AAInfo;

+ for (unsigned I = 0; I != NumStores; ++I) {

+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);

+ if (!Flags) {

+ Flags = St->getMemOperand()->getFlags();

+ AAInfo = St->getAAInfo();

+ continue;

+ }

+ // Skip merging if there's an inconsistent flag.

+ if (Flags != St->getMemOperand()->getFlags())

+ return false;

+ // Concatenate AA metadata.

+ AAInfo = AAInfo.concat(St->getAAInfo());

+ }

EVT StoreTy;

if (UseVector) {

unsigned Elts = NumStores * NumMemElts;

@@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(

// make sure we use trunc store if it's necessary to be legal.

SDValue NewStore;

if (!UseTrunc) {

- NewStore =

- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),

- FirstInChain->getPointerInfo(), FirstInChain->getAlign());

+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),

+ FirstInChain->getPointerInfo(),

+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);

} else { // Must be realized as a trunc store

EVT LegalizedStoredValTy =

TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());

@@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(

NewStore = DAG.getTruncStore(

NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),

FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,

- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());

+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);

}

// Replace all merged stores with the new store.

@@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(

SDValue StoredVal = ST->getValue();

bool IsElementZero = false;

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))

- IsElementZero = C->isNullValue();

+ IsElementZero = C->isZero();

else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))

IsElementZero = C->getConstantFPValue()->isNullValue();

if (IsElementZero) {

@@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(

break;

if (TLI.isTypeLegal(StoreTy) &&

- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&

+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,

+ DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, StoreTy,

*FirstInChain->getMemOperand(), &IsFast) &&

IsFast) {

@@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(

EVT LegalizedStoredValTy =

TLI.getTypeToTransformTo(Context, StoredVal.getValueType());

if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&

- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&

+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,

+ DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, StoreTy,

*FirstInChain->getMemOperand(), &IsFast) &&

IsFast) {

@@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(

unsigned Elts = (i + 1) * NumMemElts;

EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);

if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&

- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&

+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, Ty,

*FirstInChain->getMemOperand(), &IsFast) &&

IsFast)

@@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(

if (Ty.getSizeInBits() > MaximumLegalStoreInBits)

break;

- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&

+ if (TLI.isTypeLegal(Ty) &&

+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, Ty,

*FirstInChain->getMemOperand(), &IsFast) &&

IsFast)

@@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,

bool IsFastSt = false;

bool IsFastLd = false;

- if (TLI.isTypeLegal(StoreTy) &&

- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&

+ // Don't try vector types if we need a rotate. We may still fail the

+ // legality checks for the integer type, but we can't handle the rotate

+ // case with vectors.

+ // FIXME: We could use a shuffle in place of the rotate.

+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&

+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,

+ DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, StoreTy,

*FirstInChain->getMemOperand(), &IsFastSt) &&

IsFastSt &&

@@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,

unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;

StoreTy = EVT::getIntegerVT(Context, SizeInBits);

if (TLI.isTypeLegal(StoreTy) &&

- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&

+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,

+ DAG.getMachineFunction()) &&

TLI.allowsMemoryAccess(Context, DL, StoreTy,

*FirstInChain->getMemOperand(), &IsFastSt) &&

IsFastSt &&

@@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,

TargetLowering::TypePromoteInteger) {

EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);

if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&

- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&

+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,

+ DAG.getMachineFunction()) &&

TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&

TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&

TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&

@@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {

case ISD::LIFETIME_END:

// We can forward past any lifetime start/end that can be proven not to

// alias the node.

- if (!isAlias(Chain.getNode(), N))

+ if (!mayAlias(Chain.getNode(), N))

Chains.push_back(Chain.getOperand(0));

break;

case ISD::STORE: {

@@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,

if (!VecEltVT.isByteSized())

return SDValue();

- Align Alignment = OriginalLoad->getAlign();

- Align NewAlign = DAG.getDataLayout().getABITypeAlign(

- VecEltVT.getTypeForEVT(*DAG.getContext()));

- if (NewAlign > Alignment ||

- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))

- return SDValue();

- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?

- ISD::NON_EXTLOAD : ISD::EXTLOAD;

- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))

+ ISD::LoadExtType ExtTy =

+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;

+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||

+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))

return SDValue();

- Alignment = NewAlign;

+ Align Alignment = OriginalLoad->getAlign();

MachinePointerInfo MPI;

SDLoc DL(EVE);

if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {

int Elt = ConstEltNo->getZExtValue();

unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;

MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);

+ Alignment = commonAlignment(Alignment, PtrOff);

} else {

// Discard the pointer info except the address space because the memory

// operand can't represent this new access since the offset is variable.

MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());

+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);

}

+ bool IsFast = false;

+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,

+ OriginalLoad->getAddressSpace(), Alignment,

+ OriginalLoad->getMemOperand()->getFlags(),

+ &IsFast) ||

+ !IsFast)

+ return SDValue();

SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),

InVecVT, EltNo);

@@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

Use->getOperand(0) == VecOp &&

isa<ConstantSDNode>(Use->getOperand(1));

})) {

- APInt DemandedElts = APInt::getNullValue(NumElts);

+ APInt DemandedElts = APInt::getZero(NumElts);

for (SDNode *Use : VecOp->uses()) {

auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));

if (CstElt->getAPIntValue().ult(NumElts))

@@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

AddToWorklist(N);

return SDValue(N, 0);

}

- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);

+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);

if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {

// We simplified the vector operand of this extract element. If this

// extract is not dead, visit it again so it is folded properly.

@@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {

// Make sure the first element matches

// (zext (extract_vector_elt X, C))

+ // Offset must be a constant multiple of the

+ // known-minimum vector length of the result type.

int64_t Offset = checkElem(Op0);

- if (Offset < 0)

+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)

return SDValue();

unsigned NumElems = N->getNumOperands();

@@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {

return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));

}

+// Attempt to merge nested concat_vectors/undefs.

+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))

+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)

+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,

+ SelectionDAG &DAG) {

+ EVT VT = N->getValueType(0);

+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.

+ EVT SubVT;

+ SDValue FirstConcat;

+ for (const SDValue &Op : N->ops()) {

+ if (Op.isUndef())

+ continue;

+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)

+ return SDValue();

+ if (!FirstConcat) {

+ SubVT = Op.getOperand(0).getValueType();

+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))

+ return SDValue();

+ FirstConcat = Op;

+ continue;

+ }

+ if (SubVT != Op.getOperand(0).getValueType())

+ return SDValue();

+ }

+ assert(FirstConcat && "Concat of all-undefs found");

+ SmallVector<SDValue> ConcatOps;

+ for (const SDValue &Op : N->ops()) {

+ if (Op.isUndef()) {

+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));

+ continue;

+ }

+ ConcatOps.append(Op->op_begin(), Op->op_end());

+ }

+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);

// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR

// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at

// most two distinct vectors the same size as the result, attempt to turn this

@@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {

}

// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.

+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).

if (SDValue V = combineConcatVectorOfScalars(N, DAG))

return V;

- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.

- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))

+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {

+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.

+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))

+ return V;

+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.

if (SDValue V = combineConcatVectorOfExtracts(N, DAG))

return V;

+ }

if (SDValue V = combineConcatVectorOfCasts(N, DAG))

return V;

@@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {

return SDValue();

auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));

- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));

- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||

- !ExtIdx)

+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())

return SDValue();

// Allow targets to opt-out.

@@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {

if (!VT.isByteSized())

return SDValue();

- unsigned Index = ExtIdx->getZExtValue();

+ unsigned Index = Extract->getConstantOperandVal(1);

unsigned NumElts = VT.getVectorMinNumElements();

// The definition of EXTRACT_SUBVECTOR states that the index must be a

@@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {

// If the concatenated source types match this extract, it's a direct

// simplification:

// extract_subvec (concat V1, V2, ...), i --> Vi

- if (ConcatSrcNumElts == ExtNumElts)

+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())

return V.getOperand(ConcatOpIdx);

// If the concatenated source vectors are a multiple length of this extract,

@@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {

// concat operand. Example:

// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->

// v2i8 extract_subvec v8i8 Y, 6

- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {

+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&

+ ConcatSrcNumElts % ExtNumElts == 0) {

SDLoc DL(N);

unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;

assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&

@@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {

// otherwise => (extract_subvec V1, ExtIdx)

uint64_t InsIdx = V.getConstantOperandVal(2);

if (InsIdx * SmallVT.getScalarSizeInBits() ==

- ExtIdx * NVT.getScalarSizeInBits())

+ ExtIdx * NVT.getScalarSizeInBits()) {

+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))

+ return SDValue();

return DAG.getBitcast(NVT, V.getOperand(1));

+ }

return DAG.getNode(

ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,

DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),

@@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {

ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

// Canonicalize shuffle v, v -> v, undef

- if (N0 == N1) {

- SmallVector<int, 8> NewMask;

- for (unsigned i = 0; i != NumElts; ++i) {

- int Idx = SVN->getMaskElt(i);

- if (Idx >= (int)NumElts) Idx -= NumElts;

- NewMask.push_back(Idx);

- }

- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);

- }

+ if (N0 == N1)

+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),

+ createUnaryMask(SVN->getMask(), NumElts));

// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.

if (N0.isUndef())

@@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {

}

+ // See if we can replace a shuffle with an insert_subvector.

+ // e.g. v2i32 into v8i32:

+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).

+ // --> insert_subvector(lhs,rhs1,4).

+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&

+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {

+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {

+ // Ensure RHS subvectors are legal.

+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");

+ EVT SubVT = RHS.getOperand(0).getValueType();

+ int NumSubVecs = RHS.getNumOperands();

+ int NumSubElts = SubVT.getVectorNumElements();

+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");

+ if (!TLI.isTypeLegal(SubVT))

+ return SDValue();

+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).

+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))

+ return SDValue();

+ // Search [NumSubElts] spans for RHS sequence.

+ // TODO: Can we avoid nested loops to increase performance?

+ SmallVector<int> InsertionMask(NumElts);

+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {

+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {

+ // Reset mask to identity.

+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);

+ // Add subvector insertion.

+ std::iota(InsertionMask.begin() + SubIdx,

+ InsertionMask.begin() + SubIdx + NumSubElts,

+ NumElts + (SubVec * NumSubElts));

+ // See if the shuffle mask matches the reference insertion mask.

+ bool MatchingShuffle = true;

+ for (int i = 0; i != (int)NumElts; ++i) {

+ int ExpectIdx = InsertionMask[i];

+ int ActualIdx = Mask[i];

+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {

+ MatchingShuffle = false;

+ break;

+ }

+ if (MatchingShuffle)

+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,

+ RHS.getOperand(SubVec),

+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));

+ }

+ return SDValue();

+ };

+ ArrayRef<int> Mask = SVN->getMask();

+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)

+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))

+ return InsertN1;

+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {

+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());

+ ShuffleVectorSDNode::commuteMask(CommuteMask);

+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))

+ return InsertN0;

+ }

// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -

// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.

if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))

@@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {

return SDValue();

}

+SDValue DAGCombiner::visitVPOp(SDNode *N) {

+ // VP operations in which all vector elements are disabled - either by

+ // determining that the mask is all false or that the EVL is 0 - can be

+ // eliminated.

+ bool AreAllEltsDisabled = false;

+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))

+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));

+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))

+ AreAllEltsDisabled |=

+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());

+ // This is the only generic VP combine we support for now.

+ if (!AreAllEltsDisabled)

+ return SDValue();

+ // Binary operations can be replaced by UNDEF.

+ if (ISD::isVPBinaryOp(N->getOpcode()))

+ return DAG.getUNDEF(N->getValueType(0));

+ // VP Memory operations can be replaced by either the chain (stores) or the

+ // chain + undef (loads).

+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {

+ if (MemSD->writeMem())

+ return MemSD->getChain();

+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());

+ }

+ // Reduction operations return the start operand when no elements are active.

+ if (ISD::isVPReduction(N->getOpcode()))

+ return N->getOperand(0);

+ return SDValue();

/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle

/// with the destination vector and a zero vector.

/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>

@@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {

else

Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);

- if (Bits.isAllOnesValue())

+ if (Bits.isAllOnes())

Indices.push_back(i);

else if (Bits == 0)

Indices.push_back(i + NumSubElts);

@@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {

/// If a vector binop is performed on splat values, it may be profitable to

/// extract, scalarize, and insert/splat.

-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {

+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,

+ const SDLoc &DL) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

unsigned Opcode = N->getOpcode();

@@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {

!TLI.isOperationLegalOrCustom(Opcode, EltVT))

return SDValue();

- SDLoc DL(N);

SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);

SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);

SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);

@@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {

}

/// Visit a binary vector operation, like ADD.

-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

- assert(N->getValueType(0).isVector() &&

- "SimplifyVBinOp only works on vectors!");

+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {

+ EVT VT = N->getValueType(0);

+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");

SDValue LHS = N->getOperand(0);

SDValue RHS = N->getOperand(1);

SDValue Ops[] = {LHS, RHS};

- EVT VT = N->getValueType(0);

unsigned Opcode = N->getOpcode();

SDNodeFlags Flags = N->getFlags();

// See if we can constant fold the vector operation.

- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(

- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))

+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),

+ LHS.getValueType(), Ops))

return Fold;

// Move unary shuffles with identical masks after a vector binop:

@@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&

LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&

(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {

- SDLoc DL(N);

SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),

RHS.getOperand(0), Flags);

SDValue UndefV = LHS.getOperand(1);

@@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&

Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {

// binop (splat X), (splat C) --> splat (binop X, C)

- SDLoc DL(N);

SDValue X = Shuf0->getOperand(0);

SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);

return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),

@@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&

Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {

// binop (splat C), (splat X) --> splat (binop C, X)

- SDLoc DL(N);

SDValue X = Shuf1->getOperand(0);

SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);

return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),

@@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,

LegalOperations)) {

// (binop undef, undef) may not return undef, so compute that result.

- SDLoc DL(N);

SDValue VecC =

DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));

SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);

@@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

EVT NarrowVT = LHS.getOperand(0).getValueType();

if (NarrowVT == RHS.getOperand(0).getValueType() &&

TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {

- SDLoc DL(N);

unsigned NumOperands = LHS.getNumOperands();

SmallVector<SDValue, 4> ConcatOps;

for (unsigned i = 0; i != NumOperands; ++i) {

@@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

}

- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))

+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))

return V;

return SDValue();

@@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {

if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))

return SDValue();

- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))

+ // The use checks are intentionally on SDNode because we may be dealing

+ // with opcodes that produce more than one SDValue.

+ // TODO: Do we really need to check N0 (the condition operand of the select)?

+ // But removing that clause could cause an infinite loop...

+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())

return SDValue();

+ // Binops may include opcodes that return multiple values, so all values

+ // must be created/propagated from the newly created binops below.

+ SDVTList OpVTs = N1->getVTList();

// Fold select(cond, binop(x, y), binop(z, y))

// --> binop(select(cond, x, z), y)

if (N1.getOperand(1) == N2.getOperand(1)) {

SDValue NewSel =

DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));

- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));

+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));

NewBinOp->setFlags(N1->getFlags());

NewBinOp->intersectFlagsWith(N2->getFlags());

return NewBinOp;

@@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {

VT == N2.getOperand(1).getValueType()) {

SDValue NewSel =

DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));

- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);

+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);

NewBinOp->setFlags(N1->getFlags());

NewBinOp->intersectFlagsWith(N2->getFlags());

return NewBinOp;

@@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,

if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {

// fold select_cc true, x, y -> x

// fold select_cc false, x, y -> y

- return !(SCCC->isNullValue()) ? N2 : N3;

+ return !(SCCC->isZero()) ? N2 : N3;

}

@@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,

// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)

// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)

// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)

- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {

SDValue ValueOnZero = N2;

SDValue Count = N3;

// If the condition is NE instead of E, swap the operands.

@@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,

}

+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C

+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C

+ if (!NotExtCompare && N1C && N2C && N3C &&

+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&

+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||

+ (N1C->isZero() && CC == ISD::SETLT)) &&

+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {

+ SDValue ASR = DAG.getNode(

+ ISD::SRA, DL, CmpOpVT, N0,

+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));

+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),

+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));

+ }

return SDValue();

}

@@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {

return SDValue();

// Avoid division by zero.

- if (C->isNullValue())

+ if (C->isZero())

return SDValue();

SmallVector<SDNode *, 8> Built;

@@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {

/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)

/// For the reciprocal, we need to find the zero of the function:

-/// F(X) = A X - 1 [which has a zero at X = 1/A]

+/// F(X) = 1/X - A [which has a zero at X = 1/A]

/// =>

/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form

/// does not require additional intermediate precision]

@@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,

if (LegalDAG)

return SDValue();

- // TODO: Handle half and/or extended types?

+ // TODO: Handle extended types?

EVT VT = Op.getValueType();

- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)

+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&

+ VT.getScalarType() != MVT::f64)

return SDValue();

// If estimates are explicitly disabled for this function, we're done.

@@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,

if (LegalDAG)

return SDValue();

- // TODO: Handle half and/or extended types?

+ // TODO: Handle extended types?

EVT VT = Op.getValueType();

- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)

+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&

+ VT.getScalarType() != MVT::f64)

return SDValue();

// If estimates are explicitly disabled for this function, we're done.

@@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {

}

/// Return true if there is any possibility that the two addresses overlap.

-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {

+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {

struct MemUseCharacteristics {

bool IsVolatile;

@@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,

// TODO: Relax aliasing for unordered atomics (see D66309)

bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&

cast<LSBaseSDNode>(C.getNode())->isSimple();

- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {

+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {

// Look further up the chain.

C = C.getOperand(0);

return true;

@@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,

case ISD::LIFETIME_END: {

// We can forward past any lifetime start/end that can be proven not to

// alias the memory access.

- if (!isAlias(N, C.getNode())) {

+ if (!mayAlias(N, C.getNode())) {

// Look further up the chain.

C = C.getOperand(0);

return true;

diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4ca731cfdf62..4d1449bc2751 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp

@@ -75,6 +75,7 @@

#include "llvm/IR/DebugInfo.h"

#include "llvm/IR/DebugLoc.h"

#include "llvm/IR/DerivedTypes.h"

+#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/GlobalValue.h"

@@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {

EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)

: FuncInfo.MBB->rend();

MachineBasicBlock::reverse_iterator RI(LastLocalValue);

- for (; RI != RE;) {

- MachineInstr &LocalMI = *RI;

- // Increment before erasing what it points to.

- ++RI;

+ for (MachineInstr &LocalMI :

+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {

if (!DefReg)

continue;

@@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {

bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,

const CallInst *CI, unsigned StartIdx) {

- for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {

+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {

Value *Val = CI->getArgOperand(i);

// Check for constants and encode them with a StackMaps::ConstantOp prefix.

if (const auto *C = dyn_cast<ConstantInt>(Val)) {

@@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {

// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>

// This includes all meta-operands up to but not including CC.

unsigned NumMetaOpers = PatchPointOpers::CCPos;

- assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&

+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&

"Not enough arguments provided to the patchpoint intrinsic");

// For AnyRegCC the arguments are lowered later on manually.

@@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {

CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)

.setTailCall(IsTailCall);

+ diagnoseDontCall(*CI);

return lowerCallTo(CLI);

}

@@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

// If using instruction referencing, mutate this into a DBG_INSTR_REF,

// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto

// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.

- if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {

+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {

Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));

Builder->getOperand(1).ChangeToImmediate(0);

auto *NewExpr =

@@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,

DI->getVariable(), DI->getExpression());

} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {

+ // See if there's an expression to constant-fold.

+ DIExpression *Expr = DI->getExpression();

+ if (Expr)

+ std::tie(Expr, CI) = Expr->constantFold(CI);

if (CI->getBitWidth() > 64)

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addCImm(CI)

.addImm(0U)

.addMetadata(DI->getVariable())

- .addMetadata(DI->getExpression());

+ .addMetadata(Expr);

else

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addImm(CI->getZExtValue())

.addImm(0U)

.addMetadata(DI->getVariable())

- .addMetadata(DI->getExpression());

+ .addMetadata(Expr);

} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addFPImm(CF)

@@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

// If using instruction referencing, mutate this into a DBG_INSTR_REF,

// to be later patched up by finalizeDebugInstrRefs.

- if (TM.Options.ValueTrackingVariableLocations) {

+ if (FuncInfo.MF->useDebugInstrRef()) {

Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));

Builder->getOperand(1).ChangeToImmediate(0);

}

@@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {

bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);

const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);

- AAMDNodes AAInfo;

- I->getAAMetadata(AAInfo);

+ AAMDNodes AAInfo = I->getAAMetadata();

if (!Alignment) // Ensure that codegen never sees alignment 0.

Alignment = DL.getABITypeAlign(ValTy);

diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 348fad6daf8f..c1bb65409282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

@@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(

MIB.addFrameIndex(Op.getFrameIx());

break;

case SDDbgOperand::VREG:

- MIB.addReg(Op.getVReg(), RegState::Debug);

+ MIB.addReg(Op.getVReg());

break;

case SDDbgOperand::SDNODE: {

SDValue V = SDValue(Op.getSDNode(), Op.getResNo());

@@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {

DebugLoc DL = SD->getDebugLoc();

auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));

MIB.addReg(0U);

- MIB.addReg(0U, RegState::Debug);

+ MIB.addReg(0U);

MIB.addMetadata(Var);

MIB.addMetadata(Expr);

return &*MIB;

@@ -872,22 +872,33 @@ MachineInstr *

InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,

DenseMap<SDValue, Register> &VRBaseMap) {

MDNode *Var = SD->getVariable();

- MDNode *Expr = SD->getExpression();

+ DIExpression *Expr = SD->getExpression();

DebugLoc DL = SD->getDebugLoc();

const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);

assert(SD->getLocationOps().size() == 1 &&

"Non variadic dbg_value should have only one location op");

+ // See about constant-folding the expression.

+ // Copy the location operand in case we replace it.

+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);

+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {

+ const Value *V = LocationOps[0].getConst();

+ if (auto *C = dyn_cast<ConstantInt>(V)) {

+ std::tie(Expr, C) = Expr->constantFold(C);

+ LocationOps[0] = SDDbgOperand::fromConst(C);

+ }

// Emit non-variadic dbg_value nodes as DBG_VALUE.

// DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr

auto MIB = BuildMI(*MF, DL, II);

- AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);

+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);

if (SD->isIndirect())

MIB.addImm(0U);

else

- MIB.addReg(0U, RegState::Debug);

+ MIB.addReg(0U);

return MIB.addMetadata(Var).addMetadata(Expr);

}

@@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,

TRI(MF->getSubtarget().getRegisterInfo()),

TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),

InsertPos(insertpos) {

- EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;

+ EmitDebugInstrRefs = MF->useDebugInstrRef();

}

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d92b23f56e4d..eb9d2286aeb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

Action = TLI.getOperationAction(Node->getOpcode(),

cast<MaskedStoreSDNode>(Node)->getValue().getValueType());

break;

+ case ISD::VP_SCATTER:

+ Action = TLI.getOperationAction(

+ Node->getOpcode(),

+ cast<VPScatterSDNode>(Node)->getValue().getValueType());

+ break;

+ case ISD::VP_STORE:

+ Action = TLI.getOperationAction(

+ Node->getOpcode(),

+ cast<VPStoreSDNode>(Node)->getValue().getValueType());

+ break;

case ISD::VECREDUCE_FADD:

case ISD::VECREDUCE_FMUL:

case ISD::VECREDUCE_ADD:

@@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

Node->getOpcode(), Node->getOperand(0).getValueType());

break;

case ISD::VECREDUCE_SEQ_FADD:

+ case ISD::VECREDUCE_SEQ_FMUL:

+ case ISD::VP_REDUCE_FADD:

+ case ISD::VP_REDUCE_FMUL:

+ case ISD::VP_REDUCE_ADD:

+ case ISD::VP_REDUCE_MUL:

+ case ISD::VP_REDUCE_AND:

+ case ISD::VP_REDUCE_OR:

+ case ISD::VP_REDUCE_XOR:

+ case ISD::VP_REDUCE_SMAX:

+ case ISD::VP_REDUCE_SMIN:

+ case ISD::VP_REDUCE_UMAX:

+ case ISD::VP_REDUCE_UMIN:

+ case ISD::VP_REDUCE_FMAX:

+ case ISD::VP_REDUCE_FMIN:

+ case ISD::VP_REDUCE_SEQ_FADD:

+ case ISD::VP_REDUCE_SEQ_FMUL:

Action = TLI.getOperationAction(

Node->getOpcode(), Node->getOperand(1).getValueType());

break;

@@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {

Visited.insert(Op.getNode());

Worklist.push_back(Idx.getNode());

SDValue StackPtr, Ch;

- for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),

- UE = Vec.getNode()->use_end(); UI != UE; ++UI) {

- SDNode *User = *UI;

+ for (SDNode *User : Vec.getNode()->uses()) {

if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {

if (ST->isIndexed() || ST->isTruncatingStore() ||

ST->getValue() != Vec)

@@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {

? ISD::FCOS : ISD::FSIN;

SDValue Op0 = Node->getOperand(0);

- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),

- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {

- SDNode *User = *UI;

+ for (const SDNode *User : Op0.getNode()->uses()) {

if (User == Node)

continue;

// The other user might have been turned into sincos already.

@@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {

// If CTPOP is legal, use it. Otherwise use shifts and xor.

SDValue Result;

- if (TLI.isOperationLegal(ISD::CTPOP, VT)) {

+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {

Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);

} else {

Result = Op;

@@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

bool NeedInvert;

switch (Node->getOpcode()) {

case ISD::ABS:

- if (TLI.expandABS(Node, Tmp1, DAG))

+ if ((Tmp1 = TLI.expandABS(Node, DAG)))

Results.push_back(Tmp1);

break;

case ISD::CTPOP:

- if (TLI.expandCTPOP(Node, Tmp1, DAG))

+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))

Results.push_back(Tmp1);

break;

case ISD::CTLZ:

case ISD::CTLZ_ZERO_UNDEF:

- if (TLI.expandCTLZ(Node, Tmp1, DAG))

+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))

Results.push_back(Tmp1);

break;

case ISD::CTTZ:

case ISD::CTTZ_ZERO_UNDEF:

- if (TLI.expandCTTZ(Node, Tmp1, DAG))

+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))

Results.push_back(Tmp1);

break;

case ISD::BITREVERSE:

@@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&

TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&

"Don't know how to expand this subtraction!");

- Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),

- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,

- VT));

+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);

Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));

Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));

break;

@@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {

SDValue Op = Node->getOperand(IsStrict ? 1 : 0);

SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();

EVT VT = Node->getValueType(0);

- assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))

- ->isNullValue() &&

+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&

"Unable to expand as libcall if it is not normal rounding");

RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);

@@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {

break;

case ISD::STRICT_FFLOOR:

case ISD::STRICT_FCEIL:

+ case ISD::STRICT_FROUND:

case ISD::STRICT_FSIN:

case ISD::STRICT_FCOS:

case ISD::STRICT_FLOG:

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3553f9ec16c2..27f9cede1922 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

@@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

#endif

llvm_unreachable("Do not know how to soften the result of this operator!");

+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;

case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;

case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;

case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;

@@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {

GetSoftenedFloat(N->getOperand(0)));

}

+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {

+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,

+ GetSoftenedFloat(N->getOperand(0)));

+ return NewFence;

SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,

unsigned ResNo) {

SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);

@@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {

unsigned Size = NVT.getSizeInBits();

// Mask = ~(1 << (Size-1))

- APInt API = APInt::getAllOnesValue(Size);

+ APInt API = APInt::getAllOnes(Size);

API.clearBit(Size - 1);

SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);

SDValue Op = GetSoftenedFloat(N->getOperand(0));

@@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;

case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;

+ case ISD::STRICT_FP_TO_FP16:

case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes

case ISD::STRICT_FP_ROUND:

case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;

@@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {

// We actually deal with the partially-softened FP_TO_FP16 node too, which

// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.

assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||

+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||

N->getOpcode() == ISD::STRICT_FP_ROUND);

bool IsStrict = N->isStrictFPOpcode();

SDValue Op = N->getOperand(IsStrict ? 1 : 0);

EVT SVT = Op.getValueType();

EVT RVT = N->getValueType(0);

- EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;

+ EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||

+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)

+ ? MVT::f16

+ : RVT;

RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8a3dd014901..1fa4d88fcb4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

@@ -23,6 +23,7 @@

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Support/raw_ostream.h"

+#include <algorithm>

using namespace llvm;

#define DEBUG_TYPE "legalize-types"

@@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::STRICT_FSETCCS:

case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;

case ISD::SMIN:

- case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;

+ case ISD::SMAX:

+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);

+ break;

case ISD::UMIN:

case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;

- case ISD::SHL: Res = PromoteIntRes_SHL(N); break;

+ case ISD::SHL:

+ Res = PromoteIntRes_SHL(N, /*IsVP*/ false);

+ break;

case ISD::SIGN_EXTEND_INREG:

Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;

- case ISD::SRA: Res = PromoteIntRes_SRA(N); break;

- case ISD::SRL: Res = PromoteIntRes_SRL(N); break;

+ case ISD::SRA:

+ Res = PromoteIntRes_SRA(N, /*IsVP*/ false);

+ break;

+ case ISD::SRL:

+ Res = PromoteIntRes_SRL(N, /*IsVP*/ false);

+ break;

case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;

case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;

case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;

@@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::XOR:

case ISD::ADD:

case ISD::SUB:

- case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;

+ case ISD::MUL:

+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);

+ break;

case ISD::SDIV:

- case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;

+ case ISD::SREM:

+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);

+ break;

case ISD::UDIV:

- case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;

+ case ISD::UREM:

+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);

+ break;

case ISD::SADDO:

case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;

@@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

Res = PromoteIntRes_VECREDUCE(N);

break;

+ case ISD::VP_REDUCE_ADD:

+ case ISD::VP_REDUCE_MUL:

+ case ISD::VP_REDUCE_AND:

+ case ISD::VP_REDUCE_OR:

+ case ISD::VP_REDUCE_XOR:

+ case ISD::VP_REDUCE_SMAX:

+ case ISD::VP_REDUCE_SMIN:

+ case ISD::VP_REDUCE_UMAX:

+ case ISD::VP_REDUCE_UMIN:

+ Res = PromoteIntRes_VP_REDUCE(N);

+ break;

case ISD::FREEZE:

Res = PromoteIntRes_FREEZE(N);

break;

@@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::FSHR:

Res = PromoteIntRes_FunnelShift(N);

break;

+ case ISD::VP_AND:

+ case ISD::VP_OR:

+ case ISD::VP_XOR:

+ case ISD::VP_ADD:

+ case ISD::VP_SUB:

+ case ISD::VP_MUL:

+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);

+ break;

+ case ISD::VP_SDIV:

+ case ISD::VP_SREM:

+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);

+ break;

+ case ISD::VP_UDIV:

+ case ISD::VP_UREM:

+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);

+ break;

+ case ISD::VP_SHL:

+ Res = PromoteIntRes_SHL(N, /*IsVP*/ true);

+ break;

+ case ISD::VP_ASHR:

+ Res = PromoteIntRes_SRA(N, /*IsVP*/ true);

+ break;

+ case ISD::VP_LSHR:

+ Res = PromoteIntRes_SRL(N, /*IsVP*/ true);

+ break;

}

// If the result is null then the sub-method took care of registering it.

@@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {

CreateStackStoreLoad(InOp, OutVT));

}

-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount

-// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.

-static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,

- SelectionDAG &DAG) {

- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

- // If any possible shift value won't fit in the prefered type, just use

- // something safe. It will be legalized when the shift is expanded.

- if (!ShiftVT.isVector() &&

- ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))

- ShiftVT = MVT::i32;

- return ShiftVT;

SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {

SDValue V = GetPromotedInteger(N->getOperand(0));

return DAG.getNode(ISD::FREEZE, SDLoc(N),

@@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {

}

unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();

- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);

+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),

DAG.getConstant(DiffBits, dl, ShiftVT));

}

@@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {

}

unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();

- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);

+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

return DAG.getNode(ISD::SRL, dl, NVT,

DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),

DAG.getConstant(DiffBits, dl, ShiftVT));

@@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {

}

SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {

+ EVT OVT = N->getValueType(0);

+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);

+ SDLoc dl(N);

+ // If the larger CTLZ isn't supported by the target, try to expand now.

+ // If we expand later we'll end up with more operations since we lost the

+ // original type.

+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&

+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&

+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {

+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {

+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);

+ return Result;

+ }

// Zero extend to the promoted type and do the count there.

SDValue Op = ZExtPromotedInteger(N->getOperand(0));

- SDLoc dl(N);

- EVT OVT = N->getValueType(0);

- EVT NVT = Op.getValueType();

Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);

// Subtract off the extra leading bits in the bigger type.

return DAG.getNode(

@@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {

}

SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {

+ EVT OVT = N->getValueType(0);

+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);

+ // If the larger CTPOP isn't supported by the target, try to expand now.

+ // If we expand later we'll end up with more operations since we lost the

+ // original type.

+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to

+ // TargetLowering.

+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&

+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {

+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {

+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);

+ return Result;

+ }

// Zero extend to the promoted type and do the count or parity there.

SDValue Op = ZExtPromotedInteger(N->getOperand(0));

return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);

@@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {

EVT OVT = N->getValueType(0);

EVT NVT = Op.getValueType();

SDLoc dl(N);

+ // If the larger CTTZ isn't supported by the target, try to expand now.

+ // If we expand later we'll end up with more operations since we lost the

+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the

+ // larger type.

+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&

+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&

+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&

+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&

+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {

+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {

+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);

+ return Result;

+ }

if (N->getOpcode() == ISD::CTTZ) {

// The count is the same in the promoted type except if the original

// value was zero. This can be handled by setting the bit just off

@@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());

+ ISD::LoadExtType ExtType = N->getExtensionType();

+ if (ExtType == ISD::NON_EXTLOAD)

+ ExtType = ISD::EXTLOAD;

SDLoc dl(N);

SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),

N->getOffset(), N->getMask(), ExtPassThru,

N->getMemoryVT(), N->getMemOperand(),

- N->getAddressingMode(), ISD::EXTLOAD);

+ N->getAddressingMode(), ExtType,

+ N->isExpandingLoad());

// Legalize the chain result - switch anything that used the old chain to

// use the new one.

ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

@@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {

unsigned NewBits = PromotedType.getScalarSizeInBits();

if (Opcode == ISD::UADDSAT) {

- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);

+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);

SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);

SDValue Add =

DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);

@@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {

// Shift cannot use a min/max expansion, we can't detect overflow if all of

// the bits have been shifted out.

- if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {

+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {

unsigned ShiftOp;

switch (Opcode) {

case ISD::SADDSAT:

@@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {

return DAG.getSExtOrTrunc(SetCC, dl, NVT);

}

-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {

SDValue LHS = GetPromotedInteger(N->getOperand(0));

SDValue RHS = N->getOperand(1);

if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)

RHS = ZExtPromotedInteger(RHS);

- return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {

@@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {

Op.getValueType(), Op, N->getOperand(1));

}

-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {

// The input may have strange things in the top bits of the registers, but

// these operations don't care. They may have weird bits going out, but

// that too is okay if they are integer operations.

SDValue LHS = GetPromotedInteger(N->getOperand(0));

SDValue RHS = GetPromotedInteger(N->getOperand(1));

- return DAG.getNode(N->getOpcode(), SDLoc(N),

- LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {

// Sign extend the input.

SDValue LHS = SExtPromotedInteger(N->getOperand(0));

SDValue RHS = SExtPromotedInteger(N->getOperand(1));

- return DAG.getNode(N->getOpcode(), SDLoc(N),

- LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {

// Zero extend the input.

SDValue LHS = ZExtPromotedInteger(N->getOperand(0));

SDValue RHS = ZExtPromotedInteger(N->getOperand(1));

- return DAG.getNode(N->getOpcode(), SDLoc(N),

- LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {

@@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {

LHS.getValueType(), LHS, RHS);

}

-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {

// The input value must be properly sign extended.

SDValue LHS = SExtPromotedInteger(N->getOperand(0));

SDValue RHS = N->getOperand(1);

if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)

RHS = ZExtPromotedInteger(RHS);

- return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {

+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {

// The input value must be properly zero extended.

SDValue LHS = ZExtPromotedInteger(N->getOperand(0));

SDValue RHS = N->getOperand(1);

if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)

RHS = ZExtPromotedInteger(RHS);

- return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);

+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,

+ N->getOperand(2), N->getOperand(3));

}

SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {

@@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {

if (N->getOpcode() == ISD::UMULO) {

// Unsigned overflow occurred if the high part is non-zero.

unsigned Shift = SmallVT.getScalarSizeInBits();

- EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);

+ EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());

SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,

DAG.getConstant(Shift, DL, ShiftTy));

Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,

@@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {

case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;

case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;

case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;

+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;

case ISD::SHL:

case ISD::SRA:

@@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {

case ISD::VECREDUCE_SMIN:

case ISD::VECREDUCE_UMAX:

case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;

+ case ISD::VP_REDUCE_ADD:

+ case ISD::VP_REDUCE_MUL:

+ case ISD::VP_REDUCE_AND:

+ case ISD::VP_REDUCE_OR:

+ case ISD::VP_REDUCE_XOR:

+ case ISD::VP_REDUCE_SMAX:

+ case ISD::VP_REDUCE_SMIN:

+ case ISD::VP_REDUCE_UMAX:

+ case ISD::VP_REDUCE_UMIN:

+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);

+ break;

case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;

}

@@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,

// If the width of OpL/OpR excluding the duplicated sign bits is no greater

// than the width of NewLHS/NewRH, we can avoid inserting real truncate

// instruction, which is redundant eventually.

- unsigned OpLEffectiveBits =

- OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;

- unsigned OpREffectiveBits =

- OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;

+ unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);

+ unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);

if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&

OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {

NewLHS = OpL;

@@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){

SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,

unsigned OpNo) {

SDValue DataOp = N->getValue();

- EVT DataVT = DataOp.getValueType();

SDValue Mask = N->getMask();

- SDLoc dl(N);

- bool TruncateStore = false;

if (OpNo == 4) {

+ // The Mask. Update in place.

+ EVT DataVT = DataOp.getValueType();

Mask = PromoteTargetBoolean(Mask, DataVT);

- // Update in place.

SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());

NewOps[4] = Mask;

return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);

- } else { // Data operand

- assert(OpNo == 1 && "Unexpected operand for promotion");

- DataOp = GetPromotedInteger(DataOp);

- TruncateStore = true;

}

- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),

+ assert(OpNo == 1 && "Unexpected operand for promotion");

+ DataOp = GetPromotedInteger(DataOp);

+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),

N->getOffset(), Mask, N->getMemoryVT(),

N->getMemOperand(), N->getAddressingMode(),

- TruncateStore, N->isCompressingStore());

+ /*IsTruncating*/ true, N->isCompressingStore());

}

SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,

@@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {

return SDValue();

}

-SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {

- SDLoc dl(N);

- SDValue Op;

+static unsigned getExtendForIntVecReduction(SDNode *N) {

switch (N->getOpcode()) {

- default: llvm_unreachable("Expected integer vector reduction");

+ default:

+ llvm_unreachable("Expected integer vector reduction");

case ISD::VECREDUCE_ADD:

case ISD::VECREDUCE_MUL:

case ISD::VECREDUCE_AND:

case ISD::VECREDUCE_OR:

case ISD::VECREDUCE_XOR:

- Op = GetPromotedInteger(N->getOperand(0));

- break;

+ case ISD::VP_REDUCE_ADD:

+ case ISD::VP_REDUCE_MUL:

+ case ISD::VP_REDUCE_AND:

+ case ISD::VP_REDUCE_OR:

+ case ISD::VP_REDUCE_XOR:

+ return ISD::ANY_EXTEND;

case ISD::VECREDUCE_SMAX:

case ISD::VECREDUCE_SMIN:

- Op = SExtPromotedInteger(N->getOperand(0));

- break;

+ case ISD::VP_REDUCE_SMAX:

+ case ISD::VP_REDUCE_SMIN:

+ return ISD::SIGN_EXTEND;

case ISD::VECREDUCE_UMAX:

case ISD::VECREDUCE_UMIN:

- Op = ZExtPromotedInteger(N->getOperand(0));

- break;

+ case ISD::VP_REDUCE_UMAX:

+ case ISD::VP_REDUCE_UMIN:

+ return ISD::ZERO_EXTEND;

}

+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {

+ switch (getExtendForIntVecReduction(N)) {

+ default:

+ llvm_unreachable("Impossible extension kind for integer reduction");

+ case ISD::ANY_EXTEND:

+ return GetPromotedInteger(V);

+ case ISD::SIGN_EXTEND:

+ return SExtPromotedInteger(V);

+ case ISD::ZERO_EXTEND:

+ return ZExtPromotedInteger(V);

+ }

+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {

+ SDLoc dl(N);

+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));

EVT EltVT = Op.getValueType().getVectorElementType();

EVT VT = N->getValueType(0);

if (VT.bitsGE(EltVT))

return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);

@@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {

return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);

}

+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {

+ SDLoc DL(N);

+ SDValue Op = N->getOperand(OpNo);

+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());

+ if (OpNo == 2) { // Mask

+ // Update in place.

+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());

+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);

+ }

+ assert(OpNo == 1 && "Unexpected operand for promotion");

+ Op = PromoteIntOpVectorReduction(N, Op);

+ NewOps[OpNo] = Op;

+ EVT VT = N->getValueType(0);

+ EVT EltVT = Op.getValueType().getScalarType();

+ if (VT.bitsGE(EltVT))

+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);

+ // Result size must be >= element/start-value size. If this is not the case

+ // after promotion, also promote both the start value and result type and

+ // then truncate.

+ NewOps[0] =

+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));

+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);

+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);

SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {

SDValue Op = ZExtPromotedInteger(N->getOperand(1));

return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);

@@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {

report_fatal_error("Do not know how to expand the result of this "

"operator!");

+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;

case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;

case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;

case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;

@@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {

bool HasAddCarry = TLI.isOperationLegalOrCustom(

ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));

if (HasAddCarry) {

- EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);

+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

SDValue Sign =

DAG.getNode(ISD::SRA, dl, NVT, Hi,

DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));

@@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,

EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());

Op = GetSoftPromotedHalf(Op);

Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);

+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);

+ SplitInteger(Op, Lo, Hi);

+ return;

}

RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);

@@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,

EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());

Op = GetSoftPromotedHalf(Op);

Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);

+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);

+ SplitInteger(Op, Lo, Hi);

+ return;

}

RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);

@@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,

SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);

EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

- if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {

- // The type from TLI is too small to fit the shift amount we want.

- // Override it with i32. The shift will have to be legalized.

- ShiftAmtTy = MVT::i32;

- }

SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);

SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);

SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);

@@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,

SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

SDValue Zero = DAG.getConstant(0, dl, VT);

- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);

- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);

+ // Xor the inputs, if resulting sign bit is 0 the product will be

+ // positive, else negative.

+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);

+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);

+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);

Result = DAG.getSelect(dl, VT, Overflow, Result, Product);

} else {

// For unsigned multiplication, we only need to check the max since we

@@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,

// Saturate to signed maximum.

APInt MaxHi = APInt::getSignedMaxValue(NVTSize);

- APInt MaxLo = APInt::getAllOnesValue(NVTSize);

+ APInt MaxLo = APInt::getAllOnes(NVTSize);

Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);

Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);

// Saturate to signed minimum.

@@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,

// the new SHL_PARTS operation would need further legalization.

SDValue ShiftOp = N->getOperand(1);

EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());

- assert(ShiftTy.getScalarSizeInBits() >=

- Log2_32_Ceil(VT.getScalarSizeInBits()) &&

- "ShiftAmountTy is too small to cover the range of this type!");

if (ShiftOp.getValueType() != ShiftTy)

ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);

@@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,

}

if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {

- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ EVT ShAmtTy =

+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());

+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);

+ SDValue Ops[2] = {N->getOperand(0), ShAmt};

TargetLowering::MakeLibCallOptions CallOptions;

CallOptions.setSExt(isSigned);

SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

@@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,

LC = RTLIB::MULO_I64;

else if (VT == MVT::i128)

LC = RTLIB::MULO_I128;

- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");

+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {

+ // FIXME: This is not an optimal expansion, but better than crashing.

+ EVT WideVT =

+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);

+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));

+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));

+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);

+ SDValue MulLo, MulHi;

+ SplitInteger(Mul, MulLo, MulHi);

+ SDValue SRA =

+ DAG.getNode(ISD::SRA, dl, VT, MulLo,

+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));

+ SDValue Overflow =

+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);

+ SplitInteger(MulLo, Lo, Hi);

+ ReplaceValueWith(SDValue(N, 1), Overflow);

+ return;

+ }

SDValue Temp = DAG.CreateStackTemporary(PtrVT);

// Temporary for the overflow value, default it to zero.

@@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,

void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,

SDValue &Lo, SDValue &Hi) {

- // Lower the rotate to shifts and ORs which can be expanded.

- SDValue Res;

- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);

+ // Delegate to funnel-shift expansion.

+ SDLoc DL(N);

+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;

+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),

+ N->getOperand(0), N->getOperand(1));

SplitInteger(Res, Lo, Hi);

}

-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,

- SDValue &Lo, SDValue &Hi) {

- // Lower the funnel shift to shifts and ORs which can be expanded.

- SDValue Res;

- TLI.expandFunnelShift(N, Res, DAG);

- SplitInteger(Res, Lo, Hi);

+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ // Values numbered from least significant to most significant.

+ SDValue In1, In2, In3, In4;

+ GetExpandedInteger(N->getOperand(0), In3, In4);

+ GetExpandedInteger(N->getOperand(1), In1, In2);

+ EVT HalfVT = In1.getValueType();

+ SDLoc DL(N);

+ unsigned Opc = N->getOpcode();

+ SDValue ShAmt = N->getOperand(2);

+ EVT ShAmtVT = ShAmt.getValueType();

+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);

+ // If the shift amount is at least half the bitwidth, swap the inputs.

+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();

+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,

+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));

+ SDValue Cond =

+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),

+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);

+ // Expand to a pair of funnel shifts.

+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());

+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);

+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);

+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);

+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);

+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);

+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);

}

void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,

@@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,

if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {

if (RHSLo == RHSHi) {

if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {

- if (RHSCST->isAllOnesValue()) {

+ if (RHSCST->isAllOnes()) {

// Equality comparison to -1.

NewLHS = DAG.getNode(ISD::AND, dl,

LHSLo.getValueType(), LHSLo, LHSHi);

@@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,

// If this is a comparison of the sign bit, just look at the top part.

// X > -1, x < 0

if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))

- if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0

- (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1

+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0

+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1

NewLHS = LHSHi;

NewRHS = RHSHi;

return;

@@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,

bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||

CCCode == ISD::SETUGE || CCCode == ISD::SETULE);

- if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||

- (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||

- (LoCmpC && LoCmpC->isNullValue())))) {

+ // FIXME: Is the HiCmpC->isOne() here correct for

+ // ZeroOrNegativeOneBooleanContent.

+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||

+ (!EqAllowed &&

+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {

// For LE / GE, if high part is known false, ignore the low part.

// For LT / GT: if low part is known false, return the high part.

// if high part is known true, ignore the low part.

@@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {

SDValue InOp0 = N->getOperand(0);

EVT InVT = InOp0.getValueType();

+ // Try and extract from a smaller type so that it eventually falls

+ // into the promotion code below.

+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||

+ getTypeAction(InVT) == TargetLowering::TypeLegal) {

+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());

+ unsigned NElts = NInVT.getVectorMinNumElements();

+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();

+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,

+ DAG.getConstant(alignDown(IdxVal, NElts), dl,

+ BaseIdx.getValueType()));

+ SDValue Step2 = DAG.getNode(

+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,

+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));

+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);

+ }

+ // Try and extract from a widened type.

+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {

+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};

+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);

+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);

+ }

// Promote operands and see if this is handled by target lowering,

// Otherwise, use the BUILD_VECTOR approach below

if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {

@@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {

EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);

assert(NOutVT.isVector() && "This type must be promoted to a vector type");

+ unsigned NumOperands = N->getNumOperands();

+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();

EVT OutElemTy = NOutVT.getVectorElementType();

+ if (OutVT.isScalableVector()) {

+ // Find the largest promoted element type for each of the operands.

+ SDUse *MaxSizedValue = std::max_element(

+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {

+ EVT AVT = A.getValueType().getVectorElementType();

+ EVT BVT = B.getValueType().getVectorElementType();

+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();

+ });

+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();

+ // Then promote all vectors to the largest element type.

+ SmallVector<SDValue, 8> Ops;

+ for (unsigned I = 0; I < NumOperands; ++I) {

+ SDValue Op = N->getOperand(I);

+ EVT OpVT = Op.getValueType();

+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)

+ Op = GetPromotedInteger(Op);

+ else

+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&

+ "Unhandled legalization type");

+ if (OpVT.getVectorElementType().getScalarSizeInBits() <

+ MaxElementVT.getScalarSizeInBits())

+ Op = DAG.getAnyExtOrTrunc(Op, dl,

+ OpVT.changeVectorElementType(MaxElementVT));

+ Ops.push_back(Op);

+ }

+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)

+ // NOutVT.

+ return DAG.getAnyExtOrTrunc(

+ DAG.getNode(ISD::CONCAT_VECTORS, dl,

+ OutVT.changeVectorElementType(MaxElementVT), Ops),

+ dl, NOutVT);

+ }

unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();

- unsigned NumOutElem = NOutVT.getVectorNumElements();

- unsigned NumOperands = N->getNumOperands();

assert(NumElem * NumOperands == NumOutElem &&

"Unexpected number of elements");

@@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {

// we can simply change the result type.

SDLoc dl(N);

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

- return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));

+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());

+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {

+ // The VP_REDUCE result size may be larger than the element size, so we can

+ // simply change the result type. However the start value and result must be

+ // the same.

+ SDLoc DL(N);

+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));

+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,

+ N->getOperand(1), N->getOperand(2), N->getOperand(3));

}

SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {

@@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {

return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));

}

+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {

+ SDLoc dl(N);

+ // The result type is equal to the first input operand's type, so the

+ // type that needs promoting must be the second source vector.

+ SDValue V0 = N->getOperand(0);

+ SDValue V1 = GetPromotedInteger(N->getOperand(1));

+ SDValue Idx = N->getOperand(2);

+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),

+ V1.getValueType().getVectorElementType(),

+ V0.getValueType().getVectorElementCount());

+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);

+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);

+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));

SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {

SDLoc dl(N);

SDValue V0 = GetPromotedInteger(N->getOperand(0));

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 05a974af3b55..1f73c9eea104 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

@@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {

#endif

PerformExpensiveChecks();

- SDNode *N = Worklist.back();

- Worklist.pop_back();

+ SDNode *N = Worklist.pop_back_val();

assert(N->getNodeId() == ReadyToProcess &&

"Node should be ready if on worklist!");

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8d17d8fc68b1..da282ecad282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

@@ -289,6 +289,12 @@ private:

return DAG.getZeroExtendInReg(Op, DL, OldVT);

}

+ // Promote the given operand V (vector or scalar) according to N's specific

+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns

+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the

+ // promoted value.

+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);

// Integer Result Promotion.

void PromoteIntegerResult(SDNode *N, unsigned ResNo);

SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);

@@ -332,14 +338,14 @@ private:

SDValue PromoteIntRes_VSELECT(SDNode *N);

SDValue PromoteIntRes_SELECT_CC(SDNode *N);

SDValue PromoteIntRes_SETCC(SDNode *N);

- SDValue PromoteIntRes_SHL(SDNode *N);

- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);

- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);

- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);

+ SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);

+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);

+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);

+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);

SDValue PromoteIntRes_UMINUMAX(SDNode *N);

SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);

- SDValue PromoteIntRes_SRA(SDNode *N);

- SDValue PromoteIntRes_SRL(SDNode *N);

+ SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);

+ SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);

SDValue PromoteIntRes_TRUNCATE(SDNode *N);

SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);

SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);

@@ -353,6 +359,7 @@ private:

SDValue PromoteIntRes_DIVFIX(SDNode *N);

SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);

SDValue PromoteIntRes_VECREDUCE(SDNode *N);

+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);

SDValue PromoteIntRes_ABS(SDNode *N);

SDValue PromoteIntRes_Rotate(SDNode *N);

SDValue PromoteIntRes_FunnelShift(SDNode *N);

@@ -369,6 +376,7 @@ private:

SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);

SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);

SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);

+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);

SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);

SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);

SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);

@@ -394,6 +402,7 @@ private:

SDValue PromoteIntOp_FIX(SDNode *N);

SDValue PromoteIntOp_FPOWI(SDNode *N);

SDValue PromoteIntOp_VECREDUCE(SDNode *N);

+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);

SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);

void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);

@@ -518,6 +527,7 @@ private:

SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);

SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);

SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);

SDValue SoftenFloatRes_BITCAST(SDNode *N);

SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);

SDValue SoftenFloatRes_ConstantFP(SDNode *N);

@@ -816,7 +826,7 @@ private:

// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.

void SplitVectorResult(SDNode *N, unsigned ResNo);

- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);

void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);

@@ -898,6 +908,7 @@ private:

SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);

SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);

SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);

+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);

SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);

SDValue WidenVecRes_LOAD(SDNode* N);

SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);

@@ -912,7 +923,7 @@ private:

SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);

SDValue WidenVecRes_Ternary(SDNode *N);

- SDValue WidenVecRes_Binary(SDNode *N);

+ SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);

SDValue WidenVecRes_BinaryCanTrap(SDNode *N);

SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);

SDValue WidenVecRes_StrictFP(SDNode *N);

@@ -972,10 +983,10 @@ private:

LoadSDNode *LD, ISD::LoadExtType ExtType);

/// Helper function to generate a set of stores to store a widen vector into

- /// non-widen memory.

+ /// non-widen memory. Returns true if successful, false otherwise.

/// StChain: list of chains for the stores we have generated

/// ST: store of a widen value

- void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);

+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);

/// Modifies a vector input (widen or narrows) to a vector of NVT. The

/// input vector must have the same element type as NVT.

@@ -1011,6 +1022,7 @@ private:

// Generic Result Splitting.

void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,

SDValue &Lo, SDValue &Hi);

+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 81cc2bf10d25..3d3c9a2ad837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

@@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {

Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);

Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);

}

+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,

+ SDValue &Hi) {

+ SDValue L, H;

+ SDLoc DL(N);

+ GetSplitOp(N->getOperand(0), L, H);

+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);

+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ebe3bfc4b75a..88a28a3be53e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

@@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

return RecursivelyLegalizeResults(Op, ResultVals);

}

-// FIXME: This is very similar to the X86 override of

-// TargetLowering::LowerOperationWrapper. Can we merge them somehow?

+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we

+// merge them somehow?

bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,

SmallVectorImpl<SDValue> &Results) {

SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);

@@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {

ExpandSETCC(Node, Results);

return;

case ISD::ABS:

- if (TLI.expandABS(Node, Tmp, DAG)) {

- Results.push_back(Tmp);

+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {

+ Results.push_back(Expanded);

return;

}

break;

@@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {

ExpandBITREVERSE(Node, Results);

return;

case ISD::CTPOP:

- if (TLI.expandCTPOP(Node, Tmp, DAG)) {

- Results.push_back(Tmp);

+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {

+ Results.push_back(Expanded);

return;

}

break;

case ISD::CTLZ:

case ISD::CTLZ_ZERO_UNDEF:

- if (TLI.expandCTLZ(Node, Tmp, DAG)) {

- Results.push_back(Tmp);

+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {

+ Results.push_back(Expanded);

return;

}

break;

case ISD::CTTZ:

case ISD::CTTZ_ZERO_UNDEF:

- if (TLI.expandCTTZ(Node, Tmp, DAG)) {

- Results.push_back(Tmp);

+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {

+ Results.push_back(Expanded);

return;

}

break;

@@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {

// What is the size of each element in the vector mask.

EVT BitTy = MaskTy.getScalarType();

- Mask = DAG.getSelect(DL, BitTy, Mask,

- DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,

- BitTy),

- DAG.getConstant(0, DL, BitTy));

+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),

+ DAG.getConstant(0, DL, BitTy));

// Broadcast the mask so that the entire vector is all one or all zero.

if (VT.isFixedLengthVector())

@@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {

Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);

Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);

- SDValue AllOnes = DAG.getConstant(

- APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);

- SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);

+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);

Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);

Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);

@@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {

SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {

EVT VT = Node->getValueType(0);

+ // Scalable vectors can't use shuffle expansion.

+ if (VT.isScalableVector())

+ return TLI.expandBSWAP(Node, DAG);

// Generate a byte wise shuffle mask for the BSWAP.

SmallVector<int, 16> ShuffleMask;

createBSWAPShuffleMask(VT, ShuffleMask);

EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());

// Only emit a shuffle if the mask is legal.

- if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))

- return DAG.UnrollVectorOp(Node);

+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {

+ SDLoc DL(Node);

+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));

+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);

+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);

+ }

- SDLoc DL(Node);

- SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));

- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);

- return DAG.getNode(ISD::BITCAST, DL, VT, Op);

+ // If we have the appropriate vector bit operations, it is better to use them

+ // than unrolling and expanding each component.

+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&

+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&

+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&

+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))

+ return TLI.expandBSWAP(Node, DAG);

+ // Otherwise unroll.

+ return DAG.UnrollVectorOp(Node);

}

void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,

SmallVectorImpl<SDValue> &Results) {

EVT VT = Node->getValueType(0);

+ // We can't unroll or use shuffles for scalable vectors.

+ if (VT.isScalableVector()) {

+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));

+ return;

+ }

// If we have the scalar operation, it's probably cheaper to unroll it.

if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {

SDValue Tmp = DAG.UnrollVectorOp(Node);

@@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,

if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&

TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&

TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&

- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))

- // Let LegalizeDAG handle this later.

+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {

+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));

return;

+ }

// Otherwise unroll.

SDValue Tmp = DAG.UnrollVectorOp(Node);

@@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {

Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);

Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);

- SDValue AllOnes = DAG.getConstant(

- APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);

- SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);

+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);

Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);

Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);

@@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,

if (Node->getOpcode() == ISD::STRICT_FSETCC ||

Node->getOpcode() == ISD::STRICT_FSETCCS)

ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,

- DAG.getConstant(APInt::getAllOnesValue

- (EltVT.getSizeInBits()), dl, EltVT),

- DAG.getConstant(0, dl, EltVT));

+ DAG.getAllOnesConstant(dl, EltVT),

+ DAG.getConstant(0, dl, EltVT));

OpValues.push_back(ScalarResult);

OpChains.push_back(ScalarChain);

@@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {

TLI.getSetCCResultType(DAG.getDataLayout(),

*DAG.getContext(), TmpEltVT),

LHSElem, RHSElem, CC);

- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],

- DAG.getConstant(APInt::getAllOnesValue

- (EltVT.getSizeInBits()), dl, EltVT),

+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),

DAG.getConstant(0, dl, EltVT));

}

return DAG.getBuildVector(VT, dl, Ops);

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 91242bbf866f..539c9cb9c256 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

@@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {

SDValue Arg = N->getOperand(2).getOperand(0);

if (Arg.isUndef())

return DAG.getUNDEF(N->getValueType(0).getVectorElementType());

- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();

+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();

return GetScalarizedVector(N->getOperand(Op));

}

@@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

case ISD::USHLSAT:

case ISD::ROTL:

case ISD::ROTR:

- SplitVecRes_BinOp(N, Lo, Hi);

+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);

break;

case ISD::FMA:

case ISD::FSHL:

@@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

case ISD::UDIVFIXSAT:

SplitVecRes_FIX(N, Lo, Hi);

break;

+ case ISD::VP_ADD:

+ case ISD::VP_AND:

+ case ISD::VP_MUL:

+ case ISD::VP_OR:

+ case ISD::VP_SUB:

+ case ISD::VP_XOR:

+ case ISD::VP_SHL:

+ case ISD::VP_LSHR:

+ case ISD::VP_ASHR:

+ case ISD::VP_SDIV:

+ case ISD::VP_UDIV:

+ case ISD::VP_SREM:

+ case ISD::VP_UREM:

+ case ISD::VP_FADD:

+ case ISD::VP_FSUB:

+ case ISD::VP_FMUL:

+ case ISD::VP_FDIV:

+ case ISD::VP_FREM:

+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);

+ break;

}

// If Lo/Hi is null, the sub-method took care of registering results etc.

@@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,

}

-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,

- SDValue &Hi) {

+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,

+ bool IsVP) {

SDValue LHSLo, LHSHi;

GetSplitVector(N->getOperand(0), LHSLo, LHSHi);

SDValue RHSLo, RHSHi;

@@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,

const SDNodeFlags Flags = N->getFlags();

unsigned Opcode = N->getOpcode();

- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);

- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);

+ if (!IsVP) {

+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);

+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);

+ return;

+ }

+ // Split the mask.

+ SDValue MaskLo, MaskHi;

+ SDValue Mask = N->getOperand(2);

+ EVT MaskVT = Mask.getValueType();

+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)

+ GetSplitVector(Mask, MaskLo, MaskHi);

+ else

+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));

+ // Split the vector length parameter.

+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).

+ SDValue EVL = N->getOperand(3);

+ EVT VecVT = N->getValueType(0);

+ EVT EVLVT = EVL.getValueType();

+ assert(VecVT.getVectorElementCount().isKnownEven() &&

+ "Expecting the mask to be an evenly-sized vector");

+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;

+ SDValue HalfNumElts =

+ VecVT.isFixedLengthVector()

+ ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)

+ : DAG.getVScale(dl, EVLVT,

+ APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));

+ SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);

+ SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);

+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),

+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);

+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),

+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);

}

void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,

@@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;

case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;

case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;

+ case ISD::INSERT_SUBVECTOR:

+ Res = WidenVecRes_INSERT_SUBVECTOR(N);

+ break;

case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;

case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;

case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;

@@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

case ISD::USHLSAT:

case ISD::ROTL:

case ISD::ROTR:

- Res = WidenVecRes_Binary(N);

+ Res = WidenVecRes_Binary(N, /*IsVP*/ false);

break;

case ISD::FADD:

@@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

case ISD::FSHR:

Res = WidenVecRes_Ternary(N);

break;

+ case ISD::VP_ADD:

+ case ISD::VP_AND:

+ case ISD::VP_MUL:

+ case ISD::VP_OR:

+ case ISD::VP_SUB:

+ case ISD::VP_XOR:

+ case ISD::VP_SHL:

+ case ISD::VP_LSHR:

+ case ISD::VP_ASHR:

+ case ISD::VP_SDIV:

+ case ISD::VP_UDIV:

+ case ISD::VP_SREM:

+ case ISD::VP_UREM:

+ case ISD::VP_FADD:

+ case ISD::VP_FSUB:

+ case ISD::VP_FMUL:

+ case ISD::VP_FDIV:

+ case ISD::VP_FREM:

+ // Vector-predicated binary op widening. Note that -- unlike the

+ // unpredicated versions -- we don't have to worry about trapping on

+ // operations like UDIV, FADD, etc., as we pass on the original vector

+ // length parameter. This means the widened elements containing garbage

+ // aren't active.

+ Res = WidenVecRes_Binary(N, /*IsVP*/ true);

+ break;

}

// If Res is null, the sub-method took care of registering the result.

@@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {

return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);

}

-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {

+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {

// Binary op widening.

SDLoc dl(N);

EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue InOp1 = GetWidenedVector(N->getOperand(0));

SDValue InOp2 = GetWidenedVector(N->getOperand(1));

- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());

+ if (!IsVP)

+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,

+ N->getFlags());

+ // For VP operations, we must also widen the mask. Note that the mask type

+ // may not actually need widening, leading it be split along with the VP

+ // operation.

+ // FIXME: This could lead to an infinite split/widen loop. We only handle the

+ // case where the mask needs widening to an identically-sized type as the

+ // vector inputs.

+ SDValue Mask = N->getOperand(2);

+ assert(getTypeAction(Mask.getValueType()) ==

+ TargetLowering::TypeWidenVector &&

+ "Unable to widen binary VP op");

+ Mask = GetWidenedVector(Mask);

+ assert(Mask.getValueType().getVectorElementCount() ==

+ WidenVT.getVectorElementCount() &&

+ "Unable to widen binary VP op");

+ return DAG.getNode(N->getOpcode(), dl, WidenVT,

+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());

}

SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {

@@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

SDLoc DL(N);

EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));

- unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ ElementCount WidenEC = WidenVT.getVectorElementCount();

EVT InVT = InOp.getValueType();

@@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

}

EVT InEltVT = InVT.getVectorElementType();

- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);

- unsigned InVTNumElts = InVT.getVectorNumElements();

+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);

+ ElementCount InVTEC = InVT.getVectorElementCount();

if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {

InOp = GetWidenedVector(N->getOperand(0));

InVT = InOp.getValueType();

- InVTNumElts = InVT.getVectorNumElements();

- if (InVTNumElts == WidenNumElts) {

+ InVTEC = InVT.getVectorElementCount();

+ if (InVTEC == WidenEC) {

if (N->getNumOperands() == 1)

return DAG.getNode(Opcode, DL, WidenVT, InOp);

return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);

@@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

// it an illegal type that might lead to repeatedly splitting the input

// and then widening it. To avoid this, we widen the input only if

// it results in a legal type.

- if (WidenNumElts % InVTNumElts == 0) {

+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {

// Widen the input and call convert on the widened input vector.

- unsigned NumConcat = WidenNumElts/InVTNumElts;

+ unsigned NumConcat =

+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();

SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));

Ops[0] = InOp;

SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);

@@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);

}

- if (InVTNumElts % WidenNumElts == 0) {

+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {

SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,

DAG.getVectorIdxConstant(0, DL));

// Extract the input and convert the shorten input vector.

@@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

// Otherwise unroll into some nasty scalar code and rebuild the vector.

EVT EltVT = WidenVT.getVectorElementType();

- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));

+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));

// Use the original element count so we don't do more scalar opts than

// necessary.

unsigned MinElts = N->getValueType(0).getVectorNumElements();

@@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {

return DAG.getBuildVector(WidenVT, dl, Ops);

}

+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {

+ EVT VT = N->getValueType(0);

+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);

+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));

+ SDValue InOp2 = N->getOperand(1);

+ SDValue Idx = N->getOperand(2);

+ SDLoc dl(N);

+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);

SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {

EVT VT = N->getValueType(0);

+ EVT EltVT = VT.getVectorElementType();

EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);

SDValue InOp = N->getOperand(0);

SDValue Idx = N->getOperand(1);

SDLoc dl(N);

- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)

+ auto InOpTypeAction = getTypeAction(InOp.getValueType());

+ if (InOpTypeAction == TargetLowering::TypeWidenVector)

InOp = GetWidenedVector(InOp);

EVT InVT = InOp.getValueType();

@@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {

if (IdxVal == 0 && InVT == WidenVT)

return InOp;

- if (VT.isScalableVector())

- report_fatal_error("Don't know how to widen the result of "

- "EXTRACT_SUBVECTOR for scalable vectors");

// Check if we can extract from the vector.

- unsigned WidenNumElts = WidenVT.getVectorNumElements();

- unsigned InNumElts = InVT.getVectorNumElements();

+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();

+ unsigned InNumElts = InVT.getVectorMinNumElements();

if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)

return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);

+ if (VT.isScalableVector()) {

+ // Try to split the operation up into smaller extracts and concat the

+ // results together, e.g.

+ // nxv6i64 extract_subvector(nxv12i64, 6)

+ // <->

+ // nxv8i64 concat(

+ // nxv2i64 extract_subvector(nxv16i64, 6)

+ // nxv2i64 extract_subvector(nxv16i64, 8)

+ // nxv2i64 extract_subvector(nxv16i64, 10)

+ // undef)

+ unsigned VTNElts = VT.getVectorMinNumElements();

+ unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);

+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "

+ "down type's element count");

+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,

+ ElementCount::getScalable(GCD));

+ // Avoid recursion around e.g. nxv1i8.

+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {

+ SmallVector<SDValue> Parts;

+ unsigned I = 0;

+ for (; I < VTNElts / GCD; ++I)

+ Parts.push_back(

+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,

+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));

+ for (; I < WidenNumElts / GCD; ++I)

+ Parts.push_back(DAG.getUNDEF(PartVT));

+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);

+ }

+ report_fatal_error("Don't know how to widen the result of "

+ "EXTRACT_SUBVECTOR for scalable vectors");

+ }

// We could try widening the input to the right length but for now, extract

// the original elements, fill the rest with undefs and build a vector.

SmallVector<SDValue, 16> Ops(WidenNumElts);

- EVT EltVT = VT.getVectorElementType();

unsigned NumElts = VT.getVectorNumElements();

unsigned i;

for (i = 0; i < NumElts; ++i)

@@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {

else

Result = GenWidenVectorLoads(LdChain, LD);

- // If we generate a single load, we can use that for the chain. Otherwise,

- // build a factor node to remember the multiple loads are independent and

- // chain to that.

- SDValue NewChain;

- if (LdChain.size() == 1)

- NewChain = LdChain[0];

- else

- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);

+ if (Result) {

+ // If we generate a single load, we can use that for the chain. Otherwise,

+ // build a factor node to remember the multiple loads are independent and

+ // chain to that.

+ SDValue NewChain;

+ if (LdChain.size() == 1)

+ NewChain = LdChain[0];

+ else

+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);

- // Modified the chain - switch anything that used the old chain to use

- // the new one.

- ReplaceValueWith(SDValue(N, 1), NewChain);

+ // Modified the chain - switch anything that used the old chain to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 1), NewChain);

- return Result;

+ return Result;

+ }

+ // Generate a vector-predicated load if it is custom/legal on the target. To

+ // avoid possible recursion, only do this if the widened mask type is legal.

+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been

+ // removed from the IR by the ExpandVectorPredication pass but we're

+ // reintroducing them here.

+ EVT LdVT = LD->getMemoryVT();

+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);

+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,

+ WideVT.getVectorElementCount());

+ if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&

+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&

+ TLI.isTypeLegal(WideMaskVT)) {

+ SDLoc DL(N);

+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);

+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();

+ unsigned NumVTElts = LdVT.getVectorMinNumElements();

+ SDValue EVL =

+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));

+ const auto *MMO = LD->getMemOperand();

+ SDValue NewLoad =

+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,

+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),

+ MMO->getAAInfo());

+ // Modified the chain - switch anything that used the old chain to use

+ // the new one.

+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));

+ return NewLoad;

+ }

+ report_fatal_error("Unable to widen vector load");

}

SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {

@@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {

SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {

EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

- unsigned WidenNumElts = WidenVT.getVectorNumElements();

+ ElementCount WidenEC = WidenVT.getVectorElementCount();

SDValue Cond1 = N->getOperand(0);

EVT CondVT = Cond1.getValueType();

@@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {

}

EVT CondEltVT = CondVT.getVectorElementType();

- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),

- CondEltVT, WidenNumElts);

+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);

if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)

Cond1 = GetWidenedVector(Cond1);

@@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {

return TLI.scalarizeVectorStore(ST, DAG);

SmallVector<SDValue, 16> StChain;

- GenWidenVectorStores(StChain, ST);

+ if (GenWidenVectorStores(StChain, ST)) {

+ if (StChain.size() == 1)

+ return StChain[0];

- if (StChain.size() == 1)

- return StChain[0];

- else

return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);

+ }

+ // Generate a vector-predicated store if it is custom/legal on the target.

+ // To avoid possible recursion, only do this if the widened mask type is

+ // legal.

+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been

+ // removed from the IR by the ExpandVectorPredication pass but we're

+ // reintroducing them here.

+ SDValue StVal = ST->getValue();

+ EVT StVT = StVal.getValueType();

+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);

+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,

+ WideVT.getVectorElementCount());

+ if (WideVT.isScalableVector() &&

+ TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&

+ TLI.isTypeLegal(WideMaskVT)) {

+ // Widen the value.

+ SDLoc DL(N);

+ StVal = GetWidenedVector(StVal);

+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);

+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();

+ unsigned NumVTElts = StVT.getVectorMinNumElements();

+ SDValue EVL =

+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));

+ const auto *MMO = ST->getMemOperand();

+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,

+ EVL, MMO->getPointerInfo(), MMO->getAlign(),

+ MMO->getFlags(), MMO->getAAInfo());

+ }

+ report_fatal_error("Unable to widen vector store");

}

SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {

@@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {

// Align: If 0, don't allow use of a wider type

// WidenEx: If Align is not 0, the amount additional we can load/store from.

-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,

- unsigned Width, EVT WidenVT,

- unsigned Align = 0, unsigned WidenEx = 0) {

+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,

+ unsigned Width, EVT WidenVT,

+ unsigned Align = 0, unsigned WidenEx = 0) {

EVT WidenEltVT = WidenVT.getVectorElementType();

const bool Scalable = WidenVT.isScalableVector();

unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();

@@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,

}

+ // Using element-wise loads and stores for widening operations is not

+ // supported for scalable vectors

if (Scalable)

- report_fatal_error("Using element-wise loads and stores for widening "

- "operations is not supported for scalable vectors");

+ return None;

return RetVT;

}

@@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,

TypeSize WidthDiff = WidenWidth - LdWidth;

// Allow wider loads if they are sufficiently aligned to avoid memory faults

// and if the original load is simple.

- unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();

+ unsigned LdAlign =

+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();

// Find the vector type that can load from.

- EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,

- WidthDiff.getKnownMinSize());

- TypeSize NewVTWidth = NewVT.getSizeInBits();

- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),

+ Optional<EVT> FirstVT =

+ findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,

+ WidthDiff.getKnownMinSize());

+ if (!FirstVT)

+ return SDValue();

+ SmallVector<EVT, 8> MemVTs;

+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();

+ // Unless we're able to load in one instruction we must work out how to load

+ // the remainder.

+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {

+ Optional<EVT> NewVT = FirstVT;

+ TypeSize RemainingWidth = LdWidth;

+ TypeSize NewVTWidth = FirstVTWidth;

+ do {

+ RemainingWidth -= NewVTWidth;

+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {

+ // The current type we are using is too large. Find a better size.

+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,

+ LdAlign, WidthDiff.getKnownMinSize());

+ if (!NewVT)

+ return SDValue();

+ NewVTWidth = NewVT->getSizeInBits();

+ }

+ MemVTs.push_back(*NewVT);

+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));

+ }

+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),

LD->getOriginalAlign(), MMOFlags, AAInfo);

LdChain.push_back(LdOp.getValue(1));

// Check if we can load the element with one instruction.

- if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {

- if (!NewVT.isVector()) {

- unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();

- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);

+ if (MemVTs.empty()) {

+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));

+ if (!FirstVT->isVector()) {

+ unsigned NumElts =

+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();

+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);

SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);

return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);

}

- if (NewVT == WidenVT)

+ if (FirstVT == WidenVT)

return LdOp;

// TODO: We don't currently have any tests that exercise this code path.

- assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);

- unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();

+ assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);

+ unsigned NumConcat =

+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();

SmallVector<SDValue, 16> ConcatOps(NumConcat);

- SDValue UndefVal = DAG.getUNDEF(NewVT);

+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);

ConcatOps[0] = LdOp;

for (unsigned i = 1; i != NumConcat; ++i)

ConcatOps[i] = UndefVal;

@@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,

uint64_t ScaledOffset = 0;

MachinePointerInfo MPI = LD->getPointerInfo();

- do {

- LdWidth -= NewVTWidth;

- IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,

- &ScaledOffset);

- if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {

- // The current type we are using is too large. Find a better size.

- NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,

- WidthDiff.getKnownMinSize());

- NewVTWidth = NewVT.getSizeInBits();

- }

+ // First incremement past the first load.

+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,

+ &ScaledOffset);

+ for (EVT MemVT : MemVTs) {

Align NewAlign = ScaledOffset == 0

? LD->getOriginalAlign()

: commonAlignment(LD->getAlign(), ScaledOffset);

SDValue L =

- DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);

- LdChain.push_back(L.getValue(1));

+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);

LdOps.push_back(L);

- LdOp = L;

- } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));

+ LdChain.push_back(L.getValue(1));

+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);

+ }

// Build the vector from the load operations.

unsigned End = LdOps.size();

@@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,

return DAG.getBuildVector(WidenVT, dl, Ops);

}

-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

StoreSDNode *ST) {

// The strategy assumes that we can efficiently store power-of-two widths.

// The routine chops the vector into the largest vector stores with the same

@@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

MachinePointerInfo MPI = ST->getPointerInfo();

uint64_t ScaledOffset = 0;

+ // A breakdown of how to widen this vector store. Each element of the vector

+ // is a memory VT combined with the number of times it is to be stored to,

+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}

+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;

while (StWidth.isNonZero()) {

// Find the largest vector type we can store with.

- EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);

+ Optional<EVT> NewVT =

+ findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);

+ if (!NewVT)

+ return false;

+ MemVTs.push_back({*NewVT, 0});

+ TypeSize NewVTWidth = NewVT->getSizeInBits();

+ do {

+ StWidth -= NewVTWidth;

+ MemVTs.back().second++;

+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));

+ }

+ for (const auto &Pair : MemVTs) {

+ EVT NewVT = Pair.first;

+ unsigned Count = Pair.second;

TypeSize NewVTWidth = NewVT.getSizeInBits();

if (NewVT.isVector()) {

@@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

MMOFlags, AAInfo);

StChain.push_back(PartStore);

- StWidth -= NewVTWidth;

Idx += NumVTElts;

IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,

&ScaledOffset);

- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));

+ } while (--Count);

} else {

// Cast the vector to the scalar type we can store.

unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();

@@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,

MMOFlags, AAInfo);

StChain.push_back(PartStore);

- StWidth -= NewVTWidth;

IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);

- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));

+ } while (--Count);

// Restore index back to be relative to the original widen element type.

Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;

}

+ return true;

}

/// Modifies a vector input (widen or narrows) to a vector of NVT. The

diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 75b4242a415c..f64b332a7fef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h

@@ -192,7 +192,7 @@ public:

// Returns the SDNodes which this SDDbgValue depends on.

SmallVector<SDNode *> getSDNodes() const {

SmallVector<SDNode *> Dependencies;

- for (SDDbgOperand DbgOp : getLocationOps())

+ for (const SDDbgOperand &DbgOp : getLocationOps())

if (DbgOp.getKind() == SDDbgOperand::SDNODE)

Dependencies.push_back(DbgOp.getSDNode());

for (SDNode *Node : getAdditionalDependencies())

diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0022e5ec31f0..1b89864116cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

@@ -56,9 +56,7 @@ namespace {

SUnit *pop() {

if (empty()) return nullptr;

- SUnit *V = Queue.back();

- Queue.pop_back();

- return V;

+ return Queue.pop_back_val();

}

};

diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b2a8c8bdd78c..95f7e43b151d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

@@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {

// There are either zero or one users of the Glue result.

bool HasGlueUse = false;

- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();

- UI != E; ++UI)

- if (GlueVal.isOperandOf(*UI)) {

+ for (SDNode *U : N->uses())

+ if (GlueVal.isOperandOf(U)) {

HasGlueUse = true;

assert(N->getNodeId() == -1 && "Node already inserted!");

N->setNodeId(NodeSUnit->NodeNum);

- N = *UI;

+ N = U;

if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())

NodeSUnit->isCall = true;

break;

@@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,

/// Returns true if \p DV has any VReg operand locations which don't exist in

/// VRBaseMap.

auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {

- for (SDDbgOperand L : DV->getLocationOps()) {

+ for (const SDDbgOperand &L : DV->getLocationOps()) {

if (L.getKind() == SDDbgOperand::SDNODE &&

VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)

return true;

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a98464425c4..008665d50233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -28,6 +28,7 @@

#include "llvm/Analysis/MemoryLocation.h"

#include "llvm/Analysis/ProfileSummaryInfo.h"

#include "llvm/Analysis/ValueTracking.h"

+#include "llvm/CodeGen/Analysis.h"

#include "llvm/CodeGen/FunctionLoweringInfo.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

@@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {

if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {

APInt SplatVal;

- return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();

+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();

}

if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

@@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {

if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {

APInt SplatVal;

- return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();

+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();

}

if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

@@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {

}

+bool ISD::isVPBinaryOp(unsigned Opcode) {

+ switch (Opcode) {

+ default:

+ return false;

+#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \

+ case ISD::SDOPC: \

+ return true;

+#include "llvm/IR/VPIntrinsics.def"

+ }

+bool ISD::isVPReduction(unsigned Opcode) {

+ switch (Opcode) {

+ default:

+ return false;

+#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \

+ case ISD::SDOPC: \

+ return true;

+#include "llvm/IR/VPIntrinsics.def"

+ }

/// The operand position of the vector mask.

Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {

switch (Opcode) {

@@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {

ID.AddInteger(ST->getPointerInfo().getAddrSpace());

break;

}

+ case ISD::VP_LOAD: {

+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);

+ ID.AddInteger(ELD->getMemoryVT().getRawBits());

+ ID.AddInteger(ELD->getRawSubclassData());

+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());

+ break;

+ }

+ case ISD::VP_STORE: {

+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);

+ ID.AddInteger(EST->getMemoryVT().getRawBits());

+ ID.AddInteger(EST->getRawSubclassData());

+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());

+ break;

+ }

+ case ISD::VP_GATHER: {

+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);

+ ID.AddInteger(EG->getMemoryVT().getRawBits());

+ ID.AddInteger(EG->getRawSubclassData());

+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());

+ break;

+ }

+ case ISD::VP_SCATTER: {

+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);

+ ID.AddInteger(ES->getMemoryVT().getRawBits());

+ ID.AddInteger(ES->getRawSubclassData());

+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());

+ break;

+ }

case ISD::MLOAD: {

const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);

ID.AddInteger(MLD->getMemoryVT().getRawBits());

@@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {

/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).

SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {

- EVT EltVT = VT.getScalarType();

- SDValue NegOne =

- getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);

- return getNode(ISD::XOR, DL, VT, Val, NegOne);

+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));

}

SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {

@@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,

if (SameNumElts)

return N1;

if (auto *C = dyn_cast<ConstantSDNode>(Splat))

- if (C->isNullValue())

+ if (C->isZero())

return N1;

}

@@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,

if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {

const APInt &C1 = N1C->getAPIntValue();

- switch (Cond) {

- default: llvm_unreachable("Unknown integer setcc!");

- case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);

- case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);

- case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);

- case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);

- case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);

- case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);

- case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);

- case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);

- case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);

- case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);

- }

+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),

+ dl, VT, OpVT);

}

@@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {

return SDValue();

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return GetDemandedBits(V, DemandedBits, DemandedElts);

}

@@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,

switch (V.getOpcode()) {

case ISD::SPLAT_VECTOR:

UndefElts = V.getOperand(0).isUndef()

- ? APInt::getAllOnesValue(DemandedElts.getBitWidth())

+ ? APInt::getAllOnes(DemandedElts.getBitWidth())

: APInt(DemandedElts.getBitWidth(), 0);

return true;

case ISD::ADD:

@@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,

unsigned NumElts = VT.getVectorNumElements();

assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");

- UndefElts = APInt::getNullValue(NumElts);

+ UndefElts = APInt::getZero(NumElts);

switch (V.getOpcode()) {

case ISD::BUILD_VECTOR: {

@@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {

// For now we don't support this with scalable vectors.

if (!VT.isScalableVector())

- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());

+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());

return isSplatValue(V, DemandedElts, UndefElts) &&

(AllowUndefs || !UndefElts);

}

@@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {

APInt DemandedElts;

if (!VT.isScalableVector())

- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());

+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());

if (isSplatValue(V, DemandedElts, UndefElts)) {

if (VT.isScalableVector()) {

@@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {

}

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return computeKnownBits(Op, DemandedElts, Depth);

}

@@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);

APInt DemandedSrcElts = DemandedElts;

- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);

Known.One.setAllBits();

Known.Zero.setAllBits();

@@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

// bits from the overlapping larger input elements and extracting the

// sub sections we actually care about.

unsigned SubScale = SubBitWidth / BitWidth;

- APInt SubDemandedElts(NumElts / SubScale, 0);

- for (unsigned i = 0; i != NumElts; ++i)

- if (DemandedElts[i])

- SubDemandedElts.setBit(i / SubScale);

+ APInt SubDemandedElts =

+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);

Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);

Known.Zero.setAllBits(); Known.One.setAllBits();

@@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

// If we know the element index, just demand that vector element, else for

// an unknown element index, ignore DemandedElts and demand them all.

- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);

+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);

auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);

if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))

DemandedSrcElts =

@@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {

}))

return true;

+ // Is the operand of a splat vector a constant power of two?

+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)

+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))

+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())

+ return true;

// More could be done here, though the above checks are enough

// to handle some common cases.

@@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {

return 1;

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return ComputeNumSignBits(Op, DemandedElts, Depth);

}

@@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

assert(VT.isVector() && "Expected bitcast to vector");

unsigned Scale = SrcBits / VTBits;

- APInt SrcDemandedElts(NumElts / Scale, 0);

- for (unsigned i = 0; i != NumElts; ++i)

- if (DemandedElts[i])

- SrcDemandedElts.setBit(i / Scale);

+ APInt SrcDemandedElts =

+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);

// Fast case - sign splat can be simply split across the small elements.

Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);

@@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

// Special case decrementing a value (ADD X, -1):

if (ConstantSDNode *CRHS =

isConstOrConstSplat(Op.getOperand(1), DemandedElts))

- if (CRHS->isAllOnesValue()) {

+ if (CRHS->isAllOnes()) {

KnownBits Known =

computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

// If the input is known to be 0 or 1, the output is 0/-1, which is all

// sign bits set.

- if ((Known.Zero | 1).isAllOnesValue())

+ if ((Known.Zero | 1).isAllOnes())

return VTBits;

// If we are subtracting one from a positive number, there is no carry

@@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

// Handle NEG.

if (ConstantSDNode *CLHS =

isConstOrConstSplat(Op.getOperand(0), DemandedElts))

- if (CLHS->isNullValue()) {

+ if (CLHS->isZero()) {

KnownBits Known =

computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

// If the input is known to be 0 or 1, the output is 0/-1, which is all

// sign bits set.

- if ((Known.Zero | 1).isAllOnesValue())

+ if ((Known.Zero | 1).isAllOnes())

return VTBits;

// If the input is known to be positive (the sign bit is known clear),

@@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

// If we know the element index, just demand that vector element, else for

// an unknown element index, ignore DemandedElts and demand them all.

- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);

+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);

auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);

if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))

DemandedSrcElts =

@@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);

APInt DemandedSrcElts = DemandedElts;

- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);

Tmp = std::numeric_limits<unsigned>::max();

if (!!DemandedSubElts) {

@@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

return std::max(FirstAnswer, Mask.countLeadingOnes());

}

+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {

+ unsigned SignBits = ComputeNumSignBits(Op, Depth);

+ return Op.getScalarValueSizeInBits() - SignBits + 1;

+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,

+ const APInt &DemandedElts,

+ unsigned Depth) const {

+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);

+ return Op.getScalarValueSizeInBits() - SignBits + 1;

bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,

unsigned Depth) const {

// Early out for FREEZE.

@@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,

return false;

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);

}

@@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,

case ISD::UNDEF:

return PoisonOnly;

- // TODO: ISD::BUILD_VECTOR handling

+ case ISD::BUILD_VECTOR:

+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -

+ // this shouldn't affect the result.

+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {

+ if (!DemandedElts[i])

+ continue;

+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,

+ Depth + 1))

+ return false;

+ }

+ return true;

// TODO: Search for noundef attributes from library functions.

@@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {

"Floating point types unsupported - use isKnownNeverZeroFloat");

// If the value is a constant, we can obviously see if it is a zero or not.

- if (ISD::matchUnaryPredicate(

- Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))

+ if (ISD::matchUnaryPredicate(Op,

+ [](ConstantSDNode *C) { return !C->isZero(); }))

return true;

// TODO: Recognize more cases here.

@@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {

static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,

SelectionDAG &DAG) {

- if (cast<ConstantSDNode>(Step)->isNullValue())

+ if (cast<ConstantSDNode>(Step)->isZero())

return DAG.getConstant(0, DL, VT);

return SDValue();

@@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

case ISD::UINT_TO_FP:

case ISD::SINT_TO_FP: {

APFloat apf(EVTToAPFloatSemantics(VT),

- APInt::getNullValue(VT.getSizeInBits()));

+ APInt::getZero(VT.getSizeInBits()));

(void)apf.convertFromAPInt(Val,

Opcode==ISD::SINT_TO_FP,

APFloat::rmNearestTiesToEven);

@@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

case ISD::CTTZ_ZERO_UNDEF:

case ISD::CTPOP: {

SDValue Ops = {Operand};

- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))

+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))

return Fold;

}

@@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

}

if (OpOpcode == ISD::UNDEF)

return getUNDEF(VT);

+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)

+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));

break;

case ISD::ANY_EXTEND_VECTOR_INREG:

case ISD::ZERO_EXTEND_VECTOR_INREG:

@@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,

if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)

return SDValue();

- // For now, the array Ops should only contain two values.

- // This enforcement will be removed once this function is merged with

- // FoldConstantVectorArithmetic

- if (Ops.size() != 2)

+ unsigned NumOps = Ops.size();

+ if (NumOps == 0)

return SDValue();

if (isUndef(Opcode, Ops))

return getUNDEF(VT);

- SDNode *N1 = Ops[0].getNode();

- SDNode *N2 = Ops[1].getNode();

// Handle the case of two scalars.

- if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {

- if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {

- if (C1->isOpaque() || C2->isOpaque())

- return SDValue();

- Optional<APInt> FoldAttempt =

- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());

- if (!FoldAttempt)

- return SDValue();

- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);

- assert((!Folded || !VT.isVector()) &&

- "Can't fold vectors ops with scalar operands");

- return Folded;

- }

+ if (NumOps == 2) {

+ // TODO: Move foldConstantFPMath here?

- // fold (add Sym, c) -> Sym+c

- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))

- return FoldSymbolOffset(Opcode, VT, GA, N2);

- if (TLI->isCommutativeBinOp(Opcode))

- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))

- return FoldSymbolOffset(Opcode, VT, GA, N1);

+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {

+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {

+ if (C1->isOpaque() || C2->isOpaque())

+ return SDValue();

- // For fixed width vectors, extract each constant element and fold them

- // individually. Either input may be an undef value.

- bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||

- N1->getOpcode() == ISD::SPLAT_VECTOR;

- if (!IsBVOrSV1 && !N1->isUndef())

- return SDValue();

- bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||

- N2->getOpcode() == ISD::SPLAT_VECTOR;

- if (!IsBVOrSV2 && !N2->isUndef())

- return SDValue();

- // If both operands are undef, that's handled the same way as scalars.

- if (!IsBVOrSV1 && !IsBVOrSV2)

- return SDValue();

+ Optional<APInt> FoldAttempt =

+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());

+ if (!FoldAttempt)

+ return SDValue();

- EVT SVT = VT.getScalarType();

- EVT LegalSVT = SVT;

- if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {

- LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);

- if (LegalSVT.bitsLT(SVT))

- return SDValue();

- }

- SmallVector<SDValue, 4> Outputs;

- unsigned NumOps = 0;

- if (IsBVOrSV1)

- NumOps = std::max(NumOps, N1->getNumOperands());

- if (IsBVOrSV2)

- NumOps = std::max(NumOps, N2->getNumOperands());

- assert(NumOps != 0 && "Expected non-zero operands");

- // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need

- // one iteration for that.

- assert((!VT.isScalableVector() || NumOps == 1) &&

- "Scalable vector should only have one scalar");

- for (unsigned I = 0; I != NumOps; ++I) {

- // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need

- // to use operand 0 of the SPLAT_VECTOR for each fixed element.

- SDValue V1;

- if (N1->getOpcode() == ISD::BUILD_VECTOR)

- V1 = N1->getOperand(I);

- else if (N1->getOpcode() == ISD::SPLAT_VECTOR)

- V1 = N1->getOperand(0);

- else

- V1 = getUNDEF(SVT);

- SDValue V2;

- if (N2->getOpcode() == ISD::BUILD_VECTOR)

- V2 = N2->getOperand(I);

- else if (N2->getOpcode() == ISD::SPLAT_VECTOR)

- V2 = N2->getOperand(0);

- else

- V2 = getUNDEF(SVT);

- if (SVT.isInteger()) {

- if (V1.getValueType().bitsGT(SVT))

- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);

- if (V2.getValueType().bitsGT(SVT))

- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);

+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);

+ assert((!Folded || !VT.isVector()) &&

+ "Can't fold vectors ops with scalar operands");

+ return Folded;

+ }

}

- if (V1.getValueType() != SVT || V2.getValueType() != SVT)

- return SDValue();

- // Fold one vector element.

- SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);

- if (LegalSVT != SVT)

- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);

- // Scalar folding only succeeded if the result is a constant or UNDEF.

- if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&

- ScalarResult.getOpcode() != ISD::ConstantFP)

- return SDValue();

- Outputs.push_back(ScalarResult);

- }

- if (N1->getOpcode() == ISD::BUILD_VECTOR ||

- N2->getOpcode() == ISD::BUILD_VECTOR) {

- assert(VT.getVectorNumElements() == Outputs.size() &&

- "Vector size mismatch!");

- // Build a big vector out of the scalar elements we generated.

- return getBuildVector(VT, SDLoc(), Outputs);

+ // fold (add Sym, c) -> Sym+c

+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))

+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());

+ if (TLI->isCommutativeBinOp(Opcode))

+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))

+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());

}

- assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||

- N2->getOpcode() == ISD::SPLAT_VECTOR) &&

- "One operand should be a splat vector");

- assert(Outputs.size() == 1 && "Vector size mismatch!");

- return getSplatVector(VT, SDLoc(), Outputs[0]);

-// TODO: Merge with FoldConstantArithmetic

-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,

- const SDLoc &DL, EVT VT,

- ArrayRef<SDValue> Ops,

- const SDNodeFlags Flags) {

- // If the opcode is a target-specific ISD node, there's nothing we can

- // do here and the operand rules may not line up with the below, so

- // bail early.

- if (Opcode >= ISD::BUILTIN_OP_END)

- return SDValue();

- if (isUndef(Opcode, Ops))

- return getUNDEF(VT);

- // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?

+ // This is for vector folding only from here on.

if (!VT.isVector())

return SDValue();

ElementCount NumElts = VT.getVectorElementCount();

+ // See if we can fold through bitcasted integer ops.

+ // TODO: Can we handle undef elements?

+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&

+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&

+ Ops[0].getOpcode() == ISD::BITCAST &&

+ Ops[1].getOpcode() == ISD::BITCAST) {

+ SDValue N1 = peekThroughBitcasts(Ops[0]);

+ SDValue N2 = peekThroughBitcasts(Ops[1]);

+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);

+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);

+ EVT BVVT = N1.getValueType();

+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {

+ bool IsLE = getDataLayout().isLittleEndian();

+ unsigned EltBits = VT.getScalarSizeInBits();

+ SmallVector<APInt> RawBits1, RawBits2;

+ BitVector UndefElts1, UndefElts2;

+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&

+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&

+ UndefElts1.none() && UndefElts2.none()) {

+ SmallVector<APInt> RawBits;

+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {

+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);

+ if (!Fold)

+ break;

+ RawBits.push_back(Fold.getValue());

+ }

+ if (RawBits.size() == NumElts.getFixedValue()) {

+ // We have constant folded, but we need to cast this again back to

+ // the original (possibly legalized) type.

+ SmallVector<APInt> DstBits;

+ BitVector DstUndefs;

+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),

+ DstBits, RawBits, DstUndefs,

+ BitVector(RawBits.size(), false));

+ EVT BVEltVT = BV1->getOperand(0).getValueType();

+ unsigned BVEltBits = BVEltVT.getSizeInBits();

+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));

+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {

+ if (DstUndefs[I])

+ continue;

+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);

+ }

+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));

+ }

auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {

return !Op.getValueType().isVector() ||

Op.getValueType().getVectorElementCount() == NumElts;

};

- auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {

- APInt SplatVal;

- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);

+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {

return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||

- (BV && BV->isConstant()) ||

- (Op.getOpcode() == ISD::SPLAT_VECTOR &&

- ISD::isConstantSplatVector(Op.getNode(), SplatVal));

+ Op.getOpcode() == ISD::BUILD_VECTOR ||

+ Op.getOpcode() == ISD::SPLAT_VECTOR;

};

// All operands must be vector types with the same number of elements as

- // the result type and must be either UNDEF or a build vector of constant

+ // the result type and must be either UNDEF or a build/splat vector

// or UNDEF scalars.

- if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||

+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||

!llvm::all_of(Ops, IsScalarOrSameVectorSize))

return SDValue();

@@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,

// For scalable vector types we know we're dealing with SPLAT_VECTORs. We

// only have one operand to check. For fixed-length vector types we may have

// a combination of BUILD_VECTOR and SPLAT_VECTOR.

- unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();

+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();

// Constant fold each scalar lane separately.

SmallVector<SDValue, 4> ScalarResults;

- for (unsigned I = 0; I != NumOperands; I++) {

+ for (unsigned I = 0; I != NumVectorElts; I++) {

SmallVector<SDValue, 4> ScalarOps;

for (SDValue Op : Ops) {

EVT InSVT = Op.getValueType().getScalarType();

if (Op.getOpcode() != ISD::BUILD_VECTOR &&

Op.getOpcode() != ISD::SPLAT_VECTOR) {

- // We've checked that this is UNDEF or a constant of some kind.

if (Op.isUndef())

ScalarOps.push_back(getUNDEF(InSVT));

else

@@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,

}

// Constant fold the scalar operands.

- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);

+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);

// Legalize the (integer) scalar constant if necessary.

if (LegalSVT != SVT)

@@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

N1.getValueType() == VT && "Binary operator types must match!");

// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's

// worth handling here.

- if (N2C && N2C->isNullValue())

+ if (N2C && N2C->isZero())

return N2;

- if (N2C && N2C->isAllOnesValue()) // X & -1 -> X

+ if (N2C && N2C->isAllOnes()) // X & -1 -> X

return N1;

break;

case ISD::OR:

@@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

N1.getValueType() == VT && "Binary operator types must match!");

// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so

// it's worth handling here.

- if (N2C && N2C->isNullValue())

+ if (N2C && N2C->isZero())

return N1;

if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&

VT.getVectorElementType() == MVT::i1)

@@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

// size of the value, the shift/rotate count is guaranteed to be zero.

if (VT == MVT::i1)

return N1;

- if (N2C && N2C->isNullValue())

+ if (N2C && N2C->isZero())

return N1;

break;

case ISD::FP_ROUND:

@@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

return V;

// Vector constant folding.

SDValue Ops[] = {N1, N2, N3};

- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {

+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {

NewSDValueDbgMsg(V, "New node vector constant folding: ", this);

return V;

}

@@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

break;

case ISD::VECTOR_SHUFFLE:

llvm_unreachable("should use getVectorShuffle constructor!");

+ case ISD::VECTOR_SPLICE: {

+ if (cast<ConstantSDNode>(N3)->isNullValue())

+ return N1;

+ break;

+ }

case ISD::INSERT_VECTOR_ELT: {

ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);

// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except

@@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {

ArgChains.push_back(Chain);

// Add a chain value for each stack argument.

- for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),

- UE = getEntryNode().getNode()->use_end(); U != UE; ++U)

- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))

+ for (SDNode *U : getEntryNode().getNode()->uses())

+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))

if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))

if (FI->getIndex() < 0)

ArgChains.push_back(SDValue(L, 1));

@@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,

if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))

DstAlignCanChange = true;

bool IsZeroVal =

- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();

+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();

if (!TLI.findOptimalMemOpLowering(

MemOps, TLI.getMaxStoresPerMemset(OptSize),

MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),

@@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,

ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

if (ConstantSize) {

// Memcpy with size zero? Just return the original chain.

- if (ConstantSize->isNullValue())

+ if (ConstantSize->isZero())

return Chain;

SDValue Result = getMemcpyLoadsAndStores(

@@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,

ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

if (ConstantSize) {

// Memmove with size zero? Just return the original chain.

- if (ConstantSize->isNullValue())

+ if (ConstantSize->isZero())

return Chain;

SDValue Result = getMemmoveLoadsAndStores(

@@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,

ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);

if (ConstantSize) {

// Memset with size zero? Just return the original chain.

- if (ConstantSize->isNullValue())

+ if (ConstantSize->isZero())

return Chain;

SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,

@@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,

return V;

}

+SDValue SelectionDAG::getLoadVP(

+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,

+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,

+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,

+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,

+ const MDNode *Ranges, bool IsExpanding) {

+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

+ MMOFlags |= MachineMemOperand::MOLoad;

+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);

+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify

+ // clients.

+ if (PtrInfo.V.isNull())

+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);

+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());

+ MachineFunction &MF = getMachineFunction();

+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,

+ Alignment, AAInfo, Ranges);

+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,

+ MMO, IsExpanding);

+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,

+ ISD::LoadExtType ExtType, EVT VT,

+ const SDLoc &dl, SDValue Chain, SDValue Ptr,

+ SDValue Offset, SDValue Mask, SDValue EVL,

+ EVT MemVT, MachineMemOperand *MMO,

+ bool IsExpanding) {

+ if (VT == MemVT) {

+ ExtType = ISD::NON_EXTLOAD;

+ } else if (ExtType == ISD::NON_EXTLOAD) {

+ assert(VT == MemVT && "Non-extending load from different memory type!");

+ } else {

+ // Extending load.

+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&

+ "Should only be an extending load, not truncating!");

+ assert(VT.isInteger() == MemVT.isInteger() &&

+ "Cannot convert from FP to Int or Int -> FP!");

+ assert(VT.isVector() == MemVT.isVector() &&

+ "Cannot use an ext load to convert to or from a vector!");

+ assert((!VT.isVector() ||

+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&

+ "Cannot use an ext load to change the number of vector elements!");

+ }

+ bool Indexed = AM != ISD::UNINDEXED;

+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");

+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)

+ : getVTList(VT, MVT::Other);

+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);

+ ID.AddInteger(VT.getRawBits());

+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(

+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));

+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);

+ return SDValue(E, 0);

+ }

+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,

+ ExtType, IsExpanding, MemVT, MMO);

+ createOperands(N, Ops);

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,

+ SDValue Ptr, SDValue Mask, SDValue EVL,

+ MachinePointerInfo PtrInfo,

+ MaybeAlign Alignment,

+ MachineMemOperand::Flags MMOFlags,

+ const AAMDNodes &AAInfo, const MDNode *Ranges,

+ bool IsExpanding) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,

+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,

+ IsExpanding);

+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,

+ SDValue Ptr, SDValue Mask, SDValue EVL,

+ MachineMemOperand *MMO, bool IsExpanding) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,

+ Mask, EVL, VT, MMO, IsExpanding);

+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,

+ EVT VT, SDValue Chain, SDValue Ptr,

+ SDValue Mask, SDValue EVL,

+ MachinePointerInfo PtrInfo, EVT MemVT,

+ MaybeAlign Alignment,

+ MachineMemOperand::Flags MMOFlags,

+ const AAMDNodes &AAInfo, bool IsExpanding) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,

+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,

+ IsExpanding);

+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,

+ EVT VT, SDValue Chain, SDValue Ptr,

+ SDValue Mask, SDValue EVL, EVT MemVT,

+ MachineMemOperand *MMO, bool IsExpanding) {

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,

+ EVL, MemVT, MMO, IsExpanding);

+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,

+ SDValue Base, SDValue Offset,

+ ISD::MemIndexedMode AM) {

+ auto *LD = cast<VPLoadSDNode>(OrigLoad);

+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");

+ // Don't propagate the invariant or dereferenceable flags.

+ auto MMOFlags =

+ LD->getMemOperand()->getFlags() &

+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);

+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,

+ LD->getChain(), Base, Offset, LD->getMask(),

+ LD->getVectorLength(), LD->getPointerInfo(),

+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),

+ nullptr, LD->isExpandingLoad());

+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,

+ SDValue Ptr, SDValue Mask, SDValue EVL,

+ MachinePointerInfo PtrInfo, Align Alignment,

+ MachineMemOperand::Flags MMOFlags,

+ const AAMDNodes &AAInfo, bool IsCompressing) {

+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

+ MMOFlags |= MachineMemOperand::MOStore;

+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

+ if (PtrInfo.V.isNull())

+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);

+ MachineFunction &MF = getMachineFunction();

+ uint64_t Size =

+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());

+ MachineMemOperand *MMO =

+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);

+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);

+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,

+ SDValue Ptr, SDValue Mask, SDValue EVL,

+ MachineMemOperand *MMO, bool IsCompressing) {

+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

+ EVT VT = Val.getValueType();

+ SDVTList VTs = getVTList(MVT::Other);

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);

+ ID.AddInteger(VT.getRawBits());

+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(

+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));

+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);

+ return SDValue(E, 0);

+ }

+ auto *N =

+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,

+ ISD::UNINDEXED, false, IsCompressing, VT, MMO);

+ createOperands(N, Ops);

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,

+ SDValue Val, SDValue Ptr, SDValue Mask,

+ SDValue EVL, MachinePointerInfo PtrInfo,

+ EVT SVT, Align Alignment,

+ MachineMemOperand::Flags MMOFlags,

+ const AAMDNodes &AAInfo,

+ bool IsCompressing) {

+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

+ MMOFlags |= MachineMemOperand::MOStore;

+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);

+ if (PtrInfo.V.isNull())

+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);

+ MachineFunction &MF = getMachineFunction();

+ MachineMemOperand *MMO = MF.getMachineMemOperand(

+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),

+ Alignment, AAInfo);

+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,

+ IsCompressing);

+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,

+ SDValue Val, SDValue Ptr, SDValue Mask,

+ SDValue EVL, EVT SVT,

+ MachineMemOperand *MMO,

+ bool IsCompressing) {

+ EVT VT = Val.getValueType();

+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");

+ if (VT == SVT)

+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);

+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&

+ "Should only be a truncating store, not extending!");

+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");

+ assert(VT.isVector() == SVT.isVector() &&

+ "Cannot use trunc store to convert to or from a vector!");

+ assert((!VT.isVector() ||

+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&

+ "Cannot use trunc store to change the number of vector elements!");

+ SDVTList VTs = getVTList(MVT::Other);

+ SDValue Undef = getUNDEF(Ptr.getValueType());

+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);

+ ID.AddInteger(SVT.getRawBits());

+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(

+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));

+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);

+ return SDValue(E, 0);

+ }

+ auto *N =

+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,

+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);

+ createOperands(N, Ops);

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,

+ SDValue Base, SDValue Offset,

+ ISD::MemIndexedMode AM) {

+ auto *ST = cast<VPStoreSDNode>(OrigStore);

+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");

+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);

+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,

+ Offset, ST->getMask(), ST->getVectorLength()};

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);

+ ID.AddInteger(ST->getMemoryVT().getRawBits());

+ ID.AddInteger(ST->getRawSubclassData());

+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))

+ return SDValue(E, 0);

+ auto *N = newSDNode<VPStoreSDNode>(

+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),

+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());

+ createOperands(N, Ops);

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,

+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,

+ ISD::MemIndexType IndexType) {

+ assert(Ops.size() == 6 && "Incompatible number of operands");

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);

+ ID.AddInteger(VT.getRawBits());

+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(

+ dl.getIROrder(), VTs, VT, MMO, IndexType));

+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);

+ return SDValue(E, 0);

+ }

+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,

+ VT, MMO, IndexType);

+ createOperands(N, Ops);

+ assert(N->getMask().getValueType().getVectorElementCount() ==

+ N->getValueType(0).getVectorElementCount() &&

+ "Vector width mismatch between mask and data");

+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==

+ N->getValueType(0).getVectorElementCount().isScalable() &&

+ "Scalable flags of index and data do not match");

+ assert(ElementCount::isKnownGE(

+ N->getIndex().getValueType().getVectorElementCount(),

+ N->getValueType(0).getVectorElementCount()) &&

+ "Vector width mismatch between index and data");

+ assert(isa<ConstantSDNode>(N->getScale()) &&

+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&

+ "Scale should be a constant power of 2");

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,

+ ArrayRef<SDValue> Ops,

+ MachineMemOperand *MMO,

+ ISD::MemIndexType IndexType) {

+ assert(Ops.size() == 7 && "Incompatible number of operands");

+ FoldingSetNodeID ID;

+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);

+ ID.AddInteger(VT.getRawBits());

+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(

+ dl.getIROrder(), VTs, VT, MMO, IndexType));

+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

+ void *IP = nullptr;

+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);

+ return SDValue(E, 0);

+ }

+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,

+ VT, MMO, IndexType);

+ createOperands(N, Ops);

+ assert(N->getMask().getValueType().getVectorElementCount() ==

+ N->getValue().getValueType().getVectorElementCount() &&

+ "Vector width mismatch between mask and data");

+ assert(

+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==

+ N->getValue().getValueType().getVectorElementCount().isScalable() &&

+ "Scalable flags of index and data do not match");

+ assert(ElementCount::isKnownGE(

+ N->getIndex().getValueType().getVectorElementCount(),

+ N->getValue().getValueType().getVectorElementCount()) &&

+ "Vector width mismatch between index and data");

+ assert(isa<ConstantSDNode>(N->getScale()) &&

+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&

+ "Scale should be a constant power of 2");

+ CSEMap.InsertNode(N, IP);

+ InsertNode(N);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,

SDValue Base, SDValue Offset, SDValue Mask,

SDValue PassThru, EVT MemVT,

@@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {

// select true, T, F --> T

// select false, T, F --> F

if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))

- return CondC->isNullValue() ? F : T;

+ return CondC->isZero() ? F : T;

// TODO: This should simplify VSELECT with constant condition using something

// like this (but check boolean contents to be complete?):

@@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {

}

#ifndef NDEBUG

-void SelectionDAG::VerifyDAGDiverence() {

+void SelectionDAG::VerifyDAGDivergence() {

std::vector<SDNode *> TopoOrder;

CreateTopologicalOrder(TopoOrder);

for (auto *N : TopoOrder) {

@@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {

// before SortedPos will contain the topological sort index, and the

// Node Id fields for nodes At SortedPos and after will contain the

// count of outstanding operands.

- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {

- SDNode *N = &*I++;

- checkForCycles(N, this);

- unsigned Degree = N->getNumOperands();

+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {

+ checkForCycles(&N, this);

+ unsigned Degree = N.getNumOperands();

if (Degree == 0) {

// A node with no uses, add it to the result array immediately.

- N->setNodeId(DAGSize++);

- allnodes_iterator Q(N);

+ N.setNodeId(DAGSize++);

+ allnodes_iterator Q(&N);

if (Q != SortedPos)

SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));

assert(SortedPos != AllNodes.end() && "Overran node list");

++SortedPos;

} else {

// Temporarily use the Node Id as scratch space for the degree count.

- N->setNodeId(Degree);

+ N.setNodeId(Degree);

}

@@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,

std::string ErrorStr;

raw_string_ostream ErrorFormatter(ErrorStr);

ErrorFormatter << "Undefined external symbol ";

ErrorFormatter << '"' << Symbol << '"';

- ErrorFormatter.flush();

- report_fatal_error(ErrorStr);

+ report_fatal_error(Twine(ErrorFormatter.str()));

}

//===----------------------------------------------------------------------===//

@@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,

bool llvm::isNullConstant(SDValue V) {

ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);

- return Const != nullptr && Const->isNullValue();

+ return Const != nullptr && Const->isZero();

}

bool llvm::isNullFPConstant(SDValue V) {

@@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {

bool llvm::isAllOnesConstant(SDValue V) {

ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);

- return Const != nullptr && Const->isAllOnesValue();

+ return Const != nullptr && Const->isAllOnes();

}

bool llvm::isOneConstant(SDValue V) {

@@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {

// TODO: may want to use peekThroughBitcast() here.

ConstantSDNode *C =

isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);

- return C && C->isNullValue();

+ return C && C->isZero();

}

bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {

@@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {

N = peekThroughBitcasts(N);

unsigned BitWidth = N.getScalarValueSizeInBits();

ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);

- return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;

+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;

}

HandleSDNode::~HandleSDNode() {

@@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {

/// isOnlyUserOf - Return true if this node is the only use of N.

bool SDNode::isOnlyUserOf(const SDNode *N) const {

bool Seen = false;

- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {

- SDNode *User = *I;

+ for (const SDNode *User : N->uses()) {

if (User == this)

Seen = true;

else

@@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {

/// Return true if the only users of N are contained in Nodes.

bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {

bool Seen = false;

- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {

- SDNode *User = *I;

+ for (const SDNode *User : N->uses()) {

if (llvm::is_contained(Nodes, User))

Seen = true;

else

@@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,

"Mixing fixed width and scalable vectors when enveloping a type");

EVT LoVT, HiVT;

if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {

- LoVT = EnvVT;

+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);

HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);

*HiIsEmpty = false;

} else {

// Flag that hi type has zero storage size, but return split envelop type

// (this would be easier if vector types with zero elements were allowed).

LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);

- HiVT = EnvVT;

+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);

*HiIsEmpty = true;

}

return std::make_pair(LoVT, HiVT);

@@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,

}

SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {

- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());

+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());

return getSplatValue(DemandedElts, UndefElements);

}

@@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,

bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,

BitVector *UndefElements) const {

- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());

+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());

return getRepeatedSequence(DemandedElts, Sequence, UndefElements);

}

@@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,

return -1;

}

+bool BuildVectorSDNode::getConstantRawBits(

+ bool IsLittleEndian, unsigned DstEltSizeInBits,

+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {

+ // Early-out if this contains anything but Undef/Constant/ConstantFP.

+ if (!isConstant())

+ return false;

+ unsigned NumSrcOps = getNumOperands();

+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();

+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&

+ "Invalid bitcast scale");

+ // Extract raw src bits.

+ SmallVector<APInt> SrcBitElements(NumSrcOps,

+ APInt::getNullValue(SrcEltSizeInBits));

+ BitVector SrcUndeElements(NumSrcOps, false);

+ for (unsigned I = 0; I != NumSrcOps; ++I) {

+ SDValue Op = getOperand(I);

+ if (Op.isUndef()) {

+ SrcUndeElements.set(I);

+ continue;

+ }

+ auto *CInt = dyn_cast<ConstantSDNode>(Op);

+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);

+ assert((CInt || CFP) && "Unknown constant");

+ SrcBitElements[I] =

+ CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)

+ : CFP->getValueAPF().bitcastToAPInt();

+ }

+ // Recast to dst width.

+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,

+ SrcBitElements, UndefElements, SrcUndeElements);

+ return true;

+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,

+ unsigned DstEltSizeInBits,

+ SmallVectorImpl<APInt> &DstBitElements,

+ ArrayRef<APInt> SrcBitElements,

+ BitVector &DstUndefElements,

+ const BitVector &SrcUndefElements) {

+ unsigned NumSrcOps = SrcBitElements.size();

+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();

+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&

+ "Invalid bitcast scale");

+ assert(NumSrcOps == SrcUndefElements.size() &&

+ "Vector size mismatch");

+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;

+ DstUndefElements.clear();

+ DstUndefElements.resize(NumDstOps, false);

+ DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));

+ // Concatenate src elements constant bits together into dst element.

+ if (SrcEltSizeInBits <= DstEltSizeInBits) {

+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;

+ for (unsigned I = 0; I != NumDstOps; ++I) {

+ DstUndefElements.set(I);

+ APInt &DstBits = DstBitElements[I];

+ for (unsigned J = 0; J != Scale; ++J) {

+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));

+ if (SrcUndefElements[Idx])

+ continue;

+ DstUndefElements.reset(I);

+ const APInt &SrcBits = SrcBitElements[Idx];

+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&

+ "Illegal constant bitwidths");

+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);

+ }

+ return;

+ }

+ // Split src element constant bits into dst elements.

+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;

+ for (unsigned I = 0; I != NumSrcOps; ++I) {

+ if (SrcUndefElements[I]) {

+ DstUndefElements.set(I * Scale, (I + 1) * Scale);

+ continue;

+ }

+ const APInt &SrcBits = SrcBitElements[I];

+ for (unsigned J = 0; J != Scale; ++J) {

+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));

+ APInt &DstBits = DstBitElements[Idx];

+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);

+ }

bool BuildVectorSDNode::isConstant() const {

for (const SDValue &Op : op_values()) {

unsigned Opc = Op.getOpcode();

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 20c7d771bfb6..6d8252046501 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

@@ -14,6 +14,7 @@

#include "llvm/CodeGen/SelectionDAG.h"

#include "llvm/CodeGen/SelectionDAGNodes.h"

#include "llvm/CodeGen/TargetLowering.h"

+#include "llvm/IR/GlobalAlias.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Debug.h"

#include <cstdint>

@@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,

bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());

bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());

- // If of mismatched base types or checkable indices we can check

- // they do not alias.

- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||

- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&

- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {

- IsAlias = false;

- return true;

+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {

+ // We can derive NoAlias In case of mismatched base types.

+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {

+ IsAlias = false;

+ return true;

+ }

+ if (IsGV0 && IsGV1) {

+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();

+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();

+ // It doesn't make sense to access one global value using another globals

+ // values address, so we can assume that there is no aliasing in case of

+ // two different globals (unless we have symbols that may indirectly point

+ // to each other).

+ // FIXME: This is perhaps a bit too defensive. We could try to follow the

+ // chain with aliasee information for GlobalAlias variables to find out if

+ // we indirect symbols may alias or not.

+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {

+ IsAlias = false;

+ return true;

+ }

}

return false; // Cannot determine whether the pointers alias.

}

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d56d4bcc9169..5d911c165293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

@@ -69,6 +69,7 @@

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DebugInfoMetadata.h"

#include "llvm/IR/DerivedTypes.h"

+#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/InlineAsm.h"

@@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,

return Val;

if (PartEVT.isVector()) {

+ // Vector/Vector bitcast.

+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())

+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

// If the element type of the source/dest vectors are the same, but the

// parts vector has more elements than the value vector, then we have a

// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the

// elements we want.

- if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {

+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {

assert((PartEVT.getVectorElementCount().getKnownMinValue() >

ValueVT.getVectorElementCount().getKnownMinValue()) &&

(PartEVT.getVectorElementCount().isScalable() ==

ValueVT.getVectorElementCount().isScalable()) &&

"Cannot narrow, it would be a lossy transformation");

- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,

- DAG.getVectorIdxConstant(0, DL));

+ PartEVT =

+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),

+ ValueVT.getVectorElementCount());

+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,

+ DAG.getVectorIdxConstant(0, DL));

+ if (PartEVT == ValueVT)

+ return Val;

}

- // Vector/Vector bitcast.

- if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())

- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

- assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&

- "Cannot handle this kind of promotion");

// Promoted vector extract

return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);

}

// Trivial bitcast if the types are the same size and the destination

@@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,

// Promoted vector extract

Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);

+ } else if (PartEVT.isVector() &&

+ PartEVT.getVectorElementType() !=

+ ValueVT.getVectorElementType() &&

+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==

+ TargetLowering::TypeWidenVector) {

+ // Combination of widening and promotion.

+ EVT WidenVT =

+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),

+ PartVT.getVectorElementCount());

+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);

+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);

} else {

if (ValueVT.getVectorElementCount().isScalar()) {

Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,

@@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,

} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {

// Bitconvert vector->vector case.

Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);

- } else if (SDValue Widened =

- widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {

- Val = Widened;

- } else if (BuiltVectorTy.getVectorElementType().bitsGE(

- ValueVT.getVectorElementType()) &&

- BuiltVectorTy.getVectorElementCount() ==

- ValueVT.getVectorElementCount()) {

- // Promoted vector extract

- Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);

+ } else {

+ if (BuiltVectorTy.getVectorElementType().bitsGT(

+ ValueVT.getVectorElementType())) {

+ // Integer promotion.

+ ValueVT = EVT::getVectorVT(*DAG.getContext(),

+ BuiltVectorTy.getVectorElementType(),

+ ValueVT.getVectorElementCount());

+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);

+ }

+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {

+ Val = Widened;

+ }

}

assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");

@@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {

while (isa<Instruction>(V)) {

Instruction &VAsInst = *cast<Instruction>(V);

// Temporary "0", awaiting real implementation.

+ SmallVector<uint64_t, 16> Ops;

SmallVector<Value *, 4> AdditionalValues;

- DIExpression *SalvagedExpr =

- salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);

+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,

+ AdditionalValues);

// If we cannot salvage any further, and haven't yet found a suitable debug

// expression, bail out.

+ if (!V)

+ break;

// TODO: If AdditionalValues isn't empty, then the salvage can only be

// represented with a DBG_VALUE_LIST, so we give up. When we have support

// here for variadic dbg_values, remove that condition.

- if (!SalvagedExpr || !AdditionalValues.empty())

+ if (!AdditionalValues.empty())

break;

// New value and expr now represent this debuginfo.

- V = VAsInst.getOperand(0);

- Expr = SalvagedExpr;

+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);

// Some kind of simplification occurred: check whether the operand of the

// salvaged debug expression can be encoded in this DAG.

@@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,

BitsToDescribe = *VarSize;

if (auto Fragment = Expr->getFragmentInfo())

BitsToDescribe = Fragment->SizeInBits;

- for (auto RegAndSize : RFV.getRegsAndSizes()) {

+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {

// Bail out if all bits are described already.

if (Offset >= BitsToDescribe)

break;

@@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {

/*IsVarArg*/ false, DL);

ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,

- Attribute::SExt))

+ if (F->getAttributes().hasRetAttr(Attribute::SExt))

ExtendKind = ISD::SIGN_EXTEND;

- else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,

- Attribute::ZExt))

+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))

ExtendKind = ISD::ZERO_EXTEND;

LLVMContext &Context = F->getContext();

- bool RetInReg = F->getAttributes().hasAttribute(

- AttributeList::ReturnIndex, Attribute::InReg);

+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);

for (unsigned j = 0; j != NumValues; ++j) {

EVT VT = ValueVTs[j];

@@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {

Flags.setZExt();

for (unsigned i = 0; i < NumParts; ++i) {

- Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),

+ Outs.push_back(ISD::OutputArg(Flags,

+ Parts[i].getValueType().getSimpleVT(),

VT, /*isfixed=*/true, 0, 0));

OutVals.push_back(Parts[i]);

}

@@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {

assert(SwiftError.getFunctionArg() && "Need a swift error argument");

ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();

Flags.setSwiftError();

- Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,

- EVT(TLI.getPointerTy(DL)) /*argvt*/,

- true /*isfixed*/, 1 /*origidx*/,

- 0 /*partOffs*/));

+ Outs.push_back(ISD::OutputArg(

+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),

+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));

// Create SDNode for the swifterror virtual register.

OutVals.push_back(

DAG.getRegister(SwiftError.getOrCreateVRegUseAt(

@@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,

JumpTableReg, SwitchOp);

JT.Reg = JumpTableReg;

- if (!JTH.OmitRangeCheck) {

+ if (!JTH.FallthroughUnreachable) {

// Emit the range check for the jump table, and branch to the default block

// for the switch statement if the value being switched on exceeds the

// largest case in the switch.

@@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,

TargetLowering::ArgListEntry Entry;

Entry.Node = GuardVal;

Entry.Ty = FnTy->getParamType(0);

- if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))

+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))

Entry.IsInReg = true;

Args.push_back(Entry);

@@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,

MachineBasicBlock* MBB = B.Cases[0].ThisBB;

- if (!B.OmitRangeCheck)

+ if (!B.FallthroughUnreachable)

addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);

addSuccessorWithProb(SwitchBB, MBB, B.Prob);

SwitchBB->normalizeSuccProbs();

SDValue Root = CopyTo;

- if (!B.OmitRangeCheck) {

+ if (!B.FallthroughUnreachable) {

// Conditional branch to the default block.

SDValue RangeCmp = DAG.getSetCC(dl,

TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

@@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {

// count type has enough bits to represent any shift value, truncate

// it now. This is a common case and it exposes the truncate to

// optimization early.

- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))

+ else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))

Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);

// Otherwise we'll need to temporarily settle for some other convenient

// type. Type legalization will make adjustments once the shiftee is split.

@@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {

Type *Ty = I.getType();

Align Alignment = I.getAlign();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

+ AAMDNodes AAInfo = I.getAAMetadata();

const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

SmallVector<EVT, 4> ValueVTs, MemVTs;

@@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {

const Value *SV = I.getOperand(0);

Type *Ty = I.getType();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

assert(

(!AA ||

!AA->pointsToConstantMemory(MemoryLocation(

SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),

- AAInfo))) &&

+ I.getAAMetadata()))) &&

"load_from_swift_error should not be constant memory");

SmallVector<EVT, 4> ValueVTs;

@@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {

SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));

SDLoc dl = getCurSDLoc();

Align Alignment = I.getAlign();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

+ AAMDNodes AAInfo = I.getAAMetadata();

auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());

@@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,

if (!Alignment)

Alignment = DAG.getEVTAlign(VT);

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,

// TODO: Make MachineMemOperands aware of scalable

// vectors.

- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);

+ VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());

SDValue StoreNode =

DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,

ISD::UNINDEXED, false /* Truncating */, IsCompressing);

@@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

const DataLayout &DL = DAG.getDataLayout();

- assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");

+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");

// Handle splat constant pointer.

if (auto *C = dyn_cast<Constant>(Ptr)) {

@@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {

.getValueOr(DAG.getEVTAlign(VT.getScalarType()));

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue Base;

SDValue Index;

ISD::MemIndexType IndexType;

@@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {

MachinePointerInfo(AS), MachineMemOperand::MOStore,

// TODO: Make MachineMemOperands aware of scalable

// vectors.

- MemoryLocation::UnknownSize, Alignment, AAInfo);

+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());

if (!UniformBase) {

Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));

Index = getValue(Ptr);

@@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {

if (!Alignment)

Alignment = DAG.getEVTAlign(VT);

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

+ AAMDNodes AAInfo = I.getAAMetadata();

const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

// Do not serialize masked loads of constant memory with anything.

@@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {

->getMaybeAlignValue()

.getValueOr(DAG.getEVTAlign(VT.getScalarType()));

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

SDValue Root = DAG.getRoot();

@@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {

MachinePointerInfo(AS), MachineMemOperand::MOLoad,

// TODO: Make MachineMemOperands aware of scalable

// vectors.

- MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);

+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);

if (!UniformBase) {

Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));

@@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,

TLI.getPointerTy(DAG.getDataLayout())));

// Add all operands of the call to the operand list.

- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {

+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {

const Value *Arg = I.getArgOperand(i);

if (!I.paramHasAttr(i, Attribute::ImmArg)) {

Ops.push_back(getValue(Arg));

@@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,

SDValue Result;

if (IsTgtIntrinsic) {

// This is target intrinsic that touches memory

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

Result =

DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,

MachinePointerInfo(Info.ptrVal, Info.offset),

- Info.align, Info.flags, Info.size, AAInfo);

+ Info.align, Info.flags, Info.size,

+ I.getAAMetadata());

} else if (!HasChain) {

Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);

} else if (!I.getType()->isVoidTy()) {

@@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(

// we've been asked to pursue.

auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,

bool Indirect) {

- if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {

+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {

// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF

// pointing at the VReg, which will be patched up later.

auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);

auto MIB = BuildMI(MF, DL, Inst);

- MIB.addReg(Reg, RegState::Debug);

+ MIB.addReg(Reg);

MIB.addImm(0);

MIB.addMetadata(Variable);

auto *NewDIExpr = FragExpr;

@@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(

auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>

SplitRegs) {

unsigned Offset = 0;

- for (auto RegAndSize : SplitRegs) {

+ for (const auto &RegAndSize : SplitRegs) {

// If the expression is already a fragment, the current register

// offset+size might extend beyond the fragment. In this case, only

// the register bits that are inside the fragment are relevant.

@@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

// FIXME: Support passing different dest/src alignments to the memcpy DAG

// node.

SDValue Root = isVol ? getRoot() : getMemoryRoot();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,

/* AlwaysInline */ false, isTC,

MachinePointerInfo(I.getArgOperand(0)),

- MachinePointerInfo(I.getArgOperand(1)), AAInfo);

+ MachinePointerInfo(I.getArgOperand(1)),

+ I.getAAMetadata());

updateDAGForMaybeTailCall(MC);

return;

}

@@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());

// FIXME: Support passing different dest/src alignments to the memcpy DAG

// node.

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,

/* AlwaysInline */ true, isTC,

MachinePointerInfo(I.getArgOperand(0)),

- MachinePointerInfo(I.getArgOperand(1)), AAInfo);

+ MachinePointerInfo(I.getArgOperand(1)),

+ I.getAAMetadata());

updateDAGForMaybeTailCall(MC);

return;

}

@@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

bool isVol = MSI.isVolatile();

bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());

SDValue Root = isVol ? getRoot() : getMemoryRoot();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,

- MachinePointerInfo(I.getArgOperand(0)), AAInfo);

+ MachinePointerInfo(I.getArgOperand(0)),

+ I.getAAMetadata());

updateDAGForMaybeTailCall(MS);

return;

}

@@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

// FIXME: Support passing different dest/src alignments to the memmove DAG

// node.

SDValue Root = isVol ? getRoot() : getMemoryRoot();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,

isTC, MachinePointerInfo(I.getArgOperand(0)),

- MachinePointerInfo(I.getArgOperand(1)), AAInfo);

+ MachinePointerInfo(I.getArgOperand(1)),

+ I.getAAMetadata());

updateDAGForMaybeTailCall(MM);

return;

}

@@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

if (Values.empty())

return;

- if (std::count(Values.begin(), Values.end(), nullptr))

+ if (llvm::is_contained(Values, nullptr))

return;

bool IsVariadic = DI.hasArgList();

@@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

case Intrinsic::debugtrap:

case Intrinsic::trap: {

StringRef TrapFuncName =

- I.getAttributes()

- .getAttribute(AttributeList::FunctionIndex, "trap-func-name")

- .getValueAsString();

+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();

if (TrapFuncName.empty()) {

switch (Intrinsic) {

case Intrinsic::trap:

@@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission

// is the same on all targets.

- for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {

+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {

Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();

if (isa<ConstantPointerNull>(Arg))

continue; // Skip null pointers. They represent a hole in index space.

@@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

};

SmallVector<BranchFunnelTarget, 8> Targets;

- for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {

+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {

auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(

I.getArgOperand(Op), Offset, DAG.getDataLayout()));

if (ElemBase != Base)

@@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {

llvm_unreachable(

"Inconsistency: no SDNode available for this VPIntrinsic!");

+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||

+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {

+ if (VPIntrin.getFastMathFlags().allowReassoc())

+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD

+ : ISD::VP_REDUCE_FMUL;

+ }

return ResOPC.getValue();

}

+void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,

+ SmallVector<SDValue, 7> &OpValues,

+ bool isGather) {

+ SDLoc DL = getCurSDLoc();

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ Value *PtrOperand = VPIntrin.getArgOperand(0);

+ MaybeAlign Alignment = DAG.getEVTAlign(VT);

+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();

+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);

+ SDValue LD;

+ bool AddToChain = true;

+ if (!isGather) {

+ // Do not serialize variable-length loads of constant memory with

+ // anything.

+ MemoryLocation ML;

+ if (VT.isScalableVector())

+ ML = MemoryLocation::getAfter(PtrOperand);

+ else

+ ML = MemoryLocation(

+ PtrOperand,

+ LocationSize::precise(

+ DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),

+ AAInfo);

+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);

+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();

+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,

+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);

+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],

+ MMO, false /*IsExpanding */);

+ } else {

+ unsigned AS =

+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();

+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,

+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);

+ SDValue Base, Index, Scale;

+ ISD::MemIndexType IndexType;

+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,

+ this, VPIntrin.getParent());

+ if (!UniformBase) {

+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));

+ Index = getValue(PtrOperand);

+ IndexType = ISD::SIGNED_UNSCALED;

+ Scale =

+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));

+ }

+ EVT IdxVT = Index.getValueType();

+ EVT EltTy = IdxVT.getVectorElementType();

+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {

+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);

+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);

+ }

+ LD = DAG.getGatherVP(

+ DAG.getVTList(VT, MVT::Other), VT, DL,

+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,

+ IndexType);

+ }

+ if (AddToChain)

+ PendingLoads.push_back(LD.getValue(1));

+ setValue(&VPIntrin, LD);

+void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,

+ SmallVector<SDValue, 7> &OpValues,

+ bool isScatter) {

+ SDLoc DL = getCurSDLoc();

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ Value *PtrOperand = VPIntrin.getArgOperand(1);

+ EVT VT = OpValues[0].getValueType();

+ MaybeAlign Alignment = DAG.getEVTAlign(VT);

+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();

+ SDValue ST;

+ if (!isScatter) {

+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,

+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);

+ ST =

+ DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],

+ OpValues[2], OpValues[3], MMO, false /* IsTruncating */);

+ } else {

+ unsigned AS =

+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();

+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(

+ MachinePointerInfo(AS), MachineMemOperand::MOStore,

+ MemoryLocation::UnknownSize, *Alignment, AAInfo);

+ SDValue Base, Index, Scale;

+ ISD::MemIndexType IndexType;

+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,

+ this, VPIntrin.getParent());

+ if (!UniformBase) {

+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));

+ Index = getValue(PtrOperand);

+ IndexType = ISD::SIGNED_UNSCALED;

+ Scale =

+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));

+ }

+ EVT IdxVT = Index.getValueType();

+ EVT EltTy = IdxVT.getVectorElementType();

+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {

+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);

+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);

+ }

+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,

+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,

+ OpValues[2], OpValues[3]},

+ MMO, IndexType);

+ }

+ DAG.setRoot(ST);

+ setValue(&VPIntrin, ST);

void SelectionDAGBuilder::visitVectorPredicationIntrinsic(

const VPIntrinsic &VPIntrin) {

SDLoc DL = getCurSDLoc();

@@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(

// Request operands.

SmallVector<SDValue, 7> OpValues;

- for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {

+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {

auto Op = getValue(VPIntrin.getArgOperand(I));

if (I == EVLParamPos)

Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);

OpValues.push_back(Op);

}

- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);

- setValue(&VPIntrin, Result);

+ switch (Opcode) {

+ default: {

+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);

+ setValue(&VPIntrin, Result);

+ break;

+ }

+ case ISD::VP_LOAD:

+ case ISD::VP_GATHER:

+ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,

+ Opcode == ISD::VP_GATHER);

+ break;

+ case ISD::VP_STORE:

+ case ISD::VP_SCATTER:

+ visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);

+ break;

+ }

}

SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,

@@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {

// because the return pointer needs to be adjusted by the size of

// the copied memory.

SDValue Root = isVol ? getRoot() : getMemoryRoot();

- AAMDNodes AAInfo;

- I.getAAMetadata(AAInfo);

SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,

/*isTailCall=*/false,

MachinePointerInfo(I.getArgOperand(0)),

- MachinePointerInfo(I.getArgOperand(1)), AAInfo);

+ MachinePointerInfo(I.getArgOperand(1)),

+ I.getAAMetadata());

assert(MC.getNode() != nullptr &&

"** memcpy should not be lowered as TailCall in mempcpy context **");

DAG.setRoot(MC);

@@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {

}

if (Function *F = I.getCalledFunction()) {

+ diagnoseDontCall(I);

if (F->isDeclaration()) {

// Is this an LLVM intrinsic or a target-specific intrinsic?

unsigned IID = F->getIntrinsicID();

@@ -8176,7 +8307,7 @@ public:

}

- return TLI.getValueType(DL, OpTy, true);

+ return TLI.getAsmOperandValueType(DL, OpTy, true);

}

};

@@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,

///

/// OpInfo describes the operand

/// RefOpInfo describes the matching operand if any, the operand otherwise

-static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

- SDISelAsmOperandInfo &OpInfo,

- SDISelAsmOperandInfo &RefOpInfo) {

+static llvm::Optional<unsigned>

+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

+ SDISelAsmOperandInfo &OpInfo,

+ SDISelAsmOperandInfo &RefOpInfo) {

LLVMContext &Context = *DAG.getContext();

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

@@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

// No work to do for memory operations.

if (OpInfo.ConstraintType == TargetLowering::C_Memory)

- return;

+ return None;

// If this is a constraint for a single physreg, or a constraint for a

// register class, find it.

@@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);

// RC is unset only on failure. Return immediately.

if (!RC)

- return;

+ return None;

// Get the actual register value type. This is important, because the user

// may have asked for (e.g.) the AX register in i32 type. We need to

@@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

// No need to allocate a matching input constraint since the constraint it's

// matching to has already been allocated.

if (OpInfo.isMatchingInputConstraint())

- return;

+ return None;

EVT ValueVT = OpInfo.ConstraintVT;

if (OpInfo.ConstraintVT == MVT::Other)

@@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

// Do not check for single registers.

if (AssignedReg) {

- for (; *I != AssignedReg; ++I)

- assert(I != RC->end() && "AssignedReg should be member of RC");

+ I = std::find(I, RC->end(), AssignedReg);

+ if (I == RC->end()) {

+ // RC does not contain the selected register, which indicates a

+ // mismatch between the register and the required type/bitwidth.

+ return {AssignedReg};

+ }

}

for (; NumRegs; --NumRegs, ++I) {

@@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,

}

OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);

+ return None;

}

static unsigned

@@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,

// Process the call argument. BasicBlocks are labels, currently appearing

// only in asm's.

if (isa<CallBrInst>(Call) &&

- ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -

+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -

cast<CallBrInst>(&Call)->getNumIndirectDests() -

NumMatchingOps) &&

(NumMatchingOps == 0 ||

- ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -

- NumMatchingOps))) {

+ ArgNo - 1 <

+ (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {

const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);

EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);

OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);

@@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,

DAG.getDataLayout(), STy->getElementType(ResNo));

} else {

assert(ResNo == 0 && "Asm only has one result!");

- OpInfo.ConstraintVT =

- TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());

+ OpInfo.ConstraintVT = TLI.getAsmOperandValueType(

+ DAG.getDataLayout(), Call.getType()).getSimpleVT();

}

++ResNo;

} else {

@@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,

OpInfo.isMatchingInputConstraint()

? ConstraintOperands[OpInfo.getMatchedOperand()]

: OpInfo;

- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);

+ const auto RegError =

+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);

+ if (RegError.hasValue()) {

+ const MachineFunction &MF = DAG.getMachineFunction();

+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();

+ const char *RegName = TRI.getName(RegError.getValue());

+ emitInlineAsmError(Call, "register '" + Twine(RegName) +

+ "' allocated for constraint '" +

+ Twine(OpInfo.ConstraintCode) +

+ "' does not match required type");

+ return;

+ }

auto DetectWriteToReservedRegister = [&]() {

const MachineFunction &MF = DAG.getMachineFunction();

@@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,

MachineFunction &MF = DAG.getMachineFunction();

MachineRegisterInfo &MRI = MF.getRegInfo();

const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();

- RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);

+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);

MVT RegVT = R->getSimpleValueType(0);

- const TargetRegisterClass *RC = TiedReg.isVirtual() ?

- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);

+ const TargetRegisterClass *RC =

+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)

+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)

+ : TRI.getMinimalPhysRegClass(TiedReg);

unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);

for (unsigned i = 0; i != NumRegs; ++i)

Regs.push_back(MRI.createVirtualRegister(RC));

@@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

SDValue Op1 = getValue(I.getArgOperand(0));

SDValue Op2;

- if (I.getNumArgOperands() > 1)

+ if (I.arg_size() > 1)

Op2 = getValue(I.getArgOperand(1));

SDLoc dl = getCurSDLoc();

EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

@@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {

// if it isn't first piece, alignment must be 1

// For scalable vectors the scalable part is currently handled

// by individual targets, so we just use the known minimum size here.

- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,

- i < CLI.NumFixedArgs, i,

- j*Parts[j].getValueType().getStoreSize().getKnownMinSize());

+ ISD::OutputArg MyFlags(

+ Flags, Parts[j].getValueType().getSimpleVT(), VT,

+ i < CLI.NumFixedArgs, i,

+ j * Parts[j].getValueType().getStoreSize().getKnownMinSize());

if (NumParts > 1 && j == 0)

MyFlags.Flags.setSplit();

else if (j != 0) {

@@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {

None); // This is not an ABI copy.

SDValue Chain = DAG.getEntryNode();

- ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==

- FuncInfo.PreferredExtendType.end())

- ? ISD::ANY_EXTEND

- : FuncInfo.PreferredExtendType[V];

+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;

+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);

+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())

+ ExtendType = PreferredExtendIt->second;

RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);

PendingExports.push_back(Chain);

}

@@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {

ConstantsOut.clear();

}

-/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB

-/// is 0.

-MachineBasicBlock *

-SelectionDAGBuilder::StackProtectorDescriptor::

-AddSuccessorMBB(const BasicBlock *BB,

- MachineBasicBlock *ParentMBB,

- bool IsLikely,

- MachineBasicBlock *SuccMBB) {

- // If SuccBB has not been created yet, create it.

- if (!SuccMBB) {

- MachineFunction *MF = ParentMBB->getParent();

- MachineFunction::iterator BBI(ParentMBB);

- SuccMBB = MF->CreateMachineBasicBlock(BB);

- MF->insert(++BBI, SuccMBB);

- }

- // Add it as a successor of ParentMBB.

- ParentMBB->addSuccessor(

- SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));

- return SuccMBB;

MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {

MachineFunction::iterator I(MBB);

if (++I == FuncInfo.MF->end())

@@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,

}

- if (FallthroughUnreachable) {

- // Skip the range check if the fallthrough block is unreachable.

- JTH->OmitRangeCheck = true;

- }

+ if (FallthroughUnreachable)

+ JTH->FallthroughUnreachable = true;

- if (!JTH->OmitRangeCheck)

+ if (!JTH->FallthroughUnreachable)

addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);

addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);

CurMBB->normalizeSuccProbs();

@@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,

BTB->DefaultProb -= DefaultProb / 2;

}

- if (FallthroughUnreachable) {

- // Skip the range check if the fallthrough block is unreachable.

- BTB->OmitRangeCheck = true;

- }

+ if (FallthroughUnreachable)

+ BTB->FallthroughUnreachable = true;

// If we're in the right place, emit the bit test header right now.

if (CurMBB == SwitchMBB) {

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index df5be156821f..d6122aa0a739 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

@@ -18,6 +18,7 @@

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/SmallVector.h"

+#include "llvm/CodeGen/CodeGenCommonISel.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/SelectionDAGNodes.h"

#include "llvm/CodeGen/SwitchLoweringUtils.h"

@@ -180,204 +181,6 @@ private:

SwitchCG::CaseClusterVector &Clusters,

BranchProbability &PeeledCaseProb);

- /// A class which encapsulates all of the information needed to generate a

- /// stack protector check and signals to isel via its state being initialized

- /// that a stack protector needs to be generated.

- ///

- /// *NOTE* The following is a high level documentation of SelectionDAG Stack

- /// Protector Generation. The reason that it is placed here is for a lack of

- /// other good places to stick it.

- ///

- /// High Level Overview of SelectionDAG Stack Protector Generation:

- ///

- /// Previously, generation of stack protectors was done exclusively in the

- /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated

- /// splitting basic blocks at the IR level to create the success/failure basic

- /// blocks in the tail of the basic block in question. As a result of this,

- /// calls that would have qualified for the sibling call optimization were no

- /// longer eligible for optimization since said calls were no longer right in

- /// the "tail position" (i.e. the immediate predecessor of a ReturnInst

- /// instruction).

- ///

- /// Then it was noticed that since the sibling call optimization causes the

- /// callee to reuse the caller's stack, if we could delay the generation of

- /// the stack protector check until later in CodeGen after the sibling call

- /// decision was made, we get both the tail call optimization and the stack

- /// protector check!

- ///

- /// A few goals in solving this problem were:

- ///

- /// 1. Preserve the architecture independence of stack protector generation.

- ///

- /// 2. Preserve the normal IR level stack protector check for platforms like

- /// OpenBSD for which we support platform-specific stack protector

- /// generation.

- ///

- /// The main problem that guided the present solution is that one can not

- /// solve this problem in an architecture independent manner at the IR level

- /// only. This is because:

- ///

- /// 1. The decision on whether or not to perform a sibling call on certain

- /// platforms (for instance i386) requires lower level information

- /// related to available registers that can not be known at the IR level.

- ///

- /// 2. Even if the previous point were not true, the decision on whether to

- /// perform a tail call is done in LowerCallTo in SelectionDAG which

- /// occurs after the Stack Protector Pass. As a result, one would need to

- /// put the relevant callinst into the stack protector check success

- /// basic block (where the return inst is placed) and then move it back

- /// later at SelectionDAG/MI time before the stack protector check if the

- /// tail call optimization failed. The MI level option was nixed

- /// immediately since it would require platform-specific pattern

- /// matching. The SelectionDAG level option was nixed because

- /// SelectionDAG only processes one IR level basic block at a time

- /// implying one could not create a DAG Combine to move the callinst.

- ///

- /// To get around this problem a few things were realized:

- ///

- /// 1. While one can not handle multiple IR level basic blocks at the

- /// SelectionDAG Level, one can generate multiple machine basic blocks

- /// for one IR level basic block. This is how we handle bit tests and

- /// switches.

- ///

- /// 2. At the MI level, tail calls are represented via a special return

- /// MIInst called "tcreturn". Thus if we know the basic block in which we

- /// wish to insert the stack protector check, we get the correct behavior

- /// by always inserting the stack protector check right before the return

- /// statement. This is a "magical transformation" since no matter where

- /// the stack protector check intrinsic is, we always insert the stack

- /// protector check code at the end of the BB.

- ///

- /// Given the aforementioned constraints, the following solution was devised:

- ///

- /// 1. On platforms that do not support SelectionDAG stack protector check

- /// generation, allow for the normal IR level stack protector check

- /// generation to continue.

- ///

- /// 2. On platforms that do support SelectionDAG stack protector check

- /// generation:

- ///

- /// a. Use the IR level stack protector pass to decide if a stack

- /// protector is required/which BB we insert the stack protector check

- /// in by reusing the logic already therein. If we wish to generate a

- /// stack protector check in a basic block, we place a special IR

- /// intrinsic called llvm.stackprotectorcheck right before the BB's

- /// returninst or if there is a callinst that could potentially be

- /// sibling call optimized, before the call inst.

- ///

- /// b. Then when a BB with said intrinsic is processed, we codegen the BB

- /// normally via SelectBasicBlock. In said process, when we visit the

- /// stack protector check, we do not actually emit anything into the

- /// BB. Instead, we just initialize the stack protector descriptor

- /// class (which involves stashing information/creating the success

- /// mbbb and the failure mbb if we have not created one for this

- /// function yet) and export the guard variable that we are going to

- /// compare.

- ///

- /// c. After we finish selecting the basic block, in FinishBasicBlock if

- /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is

- /// initialized, we produce the validation code with one of these

- /// techniques:

- /// 1) with a call to a guard check function

- /// 2) with inlined instrumentation

- ///

- /// 1) We insert a call to the check function before the terminator.

- ///

- /// 2) We first find a splice point in the parent basic block

- /// before the terminator and then splice the terminator of said basic

- /// block into the success basic block. Then we code-gen a new tail for

- /// the parent basic block consisting of the two loads, the comparison,

- /// and finally two branches to the success/failure basic blocks. We

- /// conclude by code-gening the failure basic block if we have not

- /// code-gened it already (all stack protector checks we generate in

- /// the same function, use the same failure basic block).

- class StackProtectorDescriptor {

- public:

- StackProtectorDescriptor() = default;

- /// Returns true if all fields of the stack protector descriptor are

- /// initialized implying that we should/are ready to emit a stack protector.

- bool shouldEmitStackProtector() const {

- return ParentMBB && SuccessMBB && FailureMBB;

- }

- bool shouldEmitFunctionBasedCheckStackProtector() const {

- return ParentMBB && !SuccessMBB && !FailureMBB;

- }

- /// Initialize the stack protector descriptor structure for a new basic

- /// block.

- void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,

- bool FunctionBasedInstrumentation) {

- // Make sure we are not initialized yet.

- assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "

- "already initialized!");

- ParentMBB = MBB;

- if (!FunctionBasedInstrumentation) {

- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);

- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);

- }

- /// Reset state that changes when we handle different basic blocks.

- ///

- /// This currently includes:

- ///

- /// 1. The specific basic block we are generating a

- /// stack protector for (ParentMBB).

- ///

- /// 2. The successor machine basic block that will contain the tail of

- /// parent mbb after we create the stack protector check (SuccessMBB). This

- /// BB is visited only on stack protector check success.

- void resetPerBBState() {

- ParentMBB = nullptr;

- SuccessMBB = nullptr;

- }

- /// Reset state that only changes when we switch functions.

- ///

- /// This currently includes:

- ///

- /// 1. FailureMBB since we reuse the failure code path for all stack

- /// protector checks created in an individual function.

- ///

- /// 2.The guard variable since the guard variable we are checking against is

- /// always the same.

- void resetPerFunctionState() {

- FailureMBB = nullptr;

- }

- MachineBasicBlock *getParentMBB() { return ParentMBB; }

- MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }

- MachineBasicBlock *getFailureMBB() { return FailureMBB; }

- private:

- /// The basic block for which we are generating the stack protector.

- ///

- /// As a result of stack protector generation, we will splice the

- /// terminators of this basic block into the successor mbb SuccessMBB and

- /// replace it with a compare/branch to the successor mbbs

- /// SuccessMBB/FailureMBB depending on whether or not the stack protector

- /// was violated.

- MachineBasicBlock *ParentMBB = nullptr;

- /// A basic block visited on stack protector check success that contains the

- /// terminators of ParentMBB.

- MachineBasicBlock *SuccessMBB = nullptr;

- /// This basic block visited on stack protector check failure that will

- /// contain a call to __stack_chk_fail().

- MachineBasicBlock *FailureMBB = nullptr;

- /// Add a successor machine basic block to ParentMBB. If the successor mbb

- /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic

- /// block will be created. Assign a large weight if IsLikely is true.

- MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,

- MachineBasicBlock *ParentMBB,

- bool IsLikely,

- MachineBasicBlock *SuccMBB = nullptr);

- };

private:

const TargetMachine &TM;

@@ -764,6 +567,10 @@ private:

void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);

void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);

void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);

+ void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,

+ SmallVector<SDValue, 7> &OpValues, bool isGather);

+ void visitVPStoreScatter(const VPIntrinsic &VPIntrin,

+ SmallVector<SDValue, 7> &OpValues, bool isScatter);

void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);

void visitVAStart(const CallInst &I);

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 40083c614a6c..77e9e53668f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

@@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {

unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();

if (IID < Intrinsic::num_intrinsics)

return Intrinsic::getBaseName((Intrinsic::ID)IID).str();

- else if (!G)

+ if (!G)

return "Unknown intrinsic";

- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())

+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())

return TII->getName(IID);

llvm_unreachable("Invalid intrinsic ID");

}

@@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,

if (G) {

const MachineFunction *MF = &G->getMachineFunction();

return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),

- &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),

- *G->getContext());

- } else {

- LLVMContext Ctx;

- return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,

- /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);

+ &MF->getFrameInfo(),

+ G->getSubtarget().getInstrInfo(), *G->getContext());

}

+ LLVMContext Ctx;

+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,

+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);

}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

@@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,

if (!Value.getNode()) {

OS << "<null>";

return false;

- } else if (shouldPrintInline(*Value.getNode(), G)) {

+ }

+ if (shouldPrintInline(*Value.getNode(), G)) {

OS << Value->getOperationName(G) << ':';

Value->print_types(OS, G);

Value->print_details(OS, G);

return true;

- } else {

- OS << PrintNodeId(*Value.getNode());

- if (unsigned RN = Value.getResNo())

- OS << ':' << RN;

- return false;

}

+ OS << PrintNodeId(*Value.getNode());

+ if (unsigned RN = Value.getResNo())

+ OS << ':' << RN;

+ return false;

}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

@@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,

N->print(OS, G);

- if (depth < 1)

- return;

for (const SDValue &Op : N->op_values()) {

// Don't follow chain operands.

if (Op.getValueType() == MVT::Other)

continue;

OS << '\n';

- printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);

+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);

}

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1415cce3b1df..c7e37cf8ca14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

@@ -33,6 +33,7 @@

#include "llvm/Analysis/ProfileSummaryInfo.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

+#include "llvm/CodeGen/CodeGenCommonISel.h"

#include "llvm/CodeGen/FastISel.h"

#include "llvm/CodeGen/FunctionLoweringInfo.h"

#include "llvm/CodeGen/GCMetadata.h"

@@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

LiveInMap.insert(LI);

// Insert DBG_VALUE instructions for function arguments to the entry block.

- bool InstrRef = TM.Options.ValueTrackingVariableLocations;

+ bool InstrRef = MF->useDebugInstrRef();

for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {

MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];

assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&

@@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,

R << (" (in function: " + MF.getName() + ")").str();

if (ShouldAbort)

- report_fatal_error(R.getMsg());

+ report_fatal_error(Twine(R.getMsg()));

ORE.emit(R);

}

@@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

if (ViewDAGCombine1 && MatchFilterBB)

@@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

// Second step, hack on the DAG until it only uses operations and types that

@@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

// Only allow creation of legal node types.

@@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

}

@@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

{

@@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

if (ViewDAGCombineLT && MatchFilterBB)

@@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

}

@@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

if (ViewDAGCombine2 && MatchFilterBB)

@@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {

#ifndef NDEBUG

if (TTI.hasBranchDivergence())

- CurDAG->VerifyDAGDiverence();

+ CurDAG->VerifyDAGDivergence();

#endif

if (OptLevel != CodeGenOpt::None)

@@ -1045,25 +1046,25 @@ public:

} // end anonymous namespace

// This function is used to enforce the topological node id property

-// property leveraged during Instruction selection. Before selection all

-// nodes are given a non-negative id such that all nodes have a larger id than

+// leveraged during instruction selection. Before the selection process all

+// nodes are given a non-negative id such that all nodes have a greater id than

// their operands. As this holds transitively we can prune checks that a node N

// is a predecessor of M another by not recursively checking through M's

-// operands if N's ID is larger than M's ID. This is significantly improves

-// performance of for various legality checks (e.g. IsLegalToFold /

-// UpdateChains).

+// operands if N's ID is larger than M's ID. This significantly improves

+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).

-// However, when we fuse multiple nodes into a single node

-// during selection we may induce a predecessor relationship between inputs and

-// outputs of distinct nodes being merged violating the topological property.

-// Should a fused node have a successor which has yet to be selected, our

-// legality checks would be incorrect. To avoid this we mark all unselected

-// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>

+// However, when we fuse multiple nodes into a single node during the

+// selection we may induce a predecessor relationship between inputs and

+// outputs of distinct nodes being merged, violating the topological property.

+// Should a fused node have a successor which has yet to be selected,

+// our legality checks would be incorrect. To avoid this we mark all unselected

+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>

// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.

// We use bit-negation to more clearly enforce that node id -1 can only be

-// achieved by selected nodes). As the conversion is reversable the original Id,

-// topological pruning can still be leveraged when looking for unselected nodes.

-// This method is call internally in all ISel replacement calls.

+// achieved by selected nodes. As the conversion is reversable to the original

+// Id, topological pruning can still be leveraged when looking for unselected

+// nodes. This method is called internally in all ISel replacement related

+// functions.

void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {

SmallVector<SDNode *, 4> Nodes;

Nodes.push_back(Node);

@@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {

}

-// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a

+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a

// NodeId with the equivalent node id which is invalid for topological

// pruning.

void SelectionDAGISel::InvalidateNodeId(SDNode *N) {

@@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,

bool IsSingleCatchAllClause =

CPI->getNumArgOperands() == 1 &&

cast<Constant>(CPI->getArgOperand(0))->isNullValue();

- if (!IsSingleCatchAllClause) {

+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []

+ // and they don't need LSDA info

+ bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;

+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {

// Create a mapping from landing pad label to landing pad index.

bool IntrFound = false;

for (const User *U : CPI->users()) {

@@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {

SDB->SPDescriptor.resetPerFunctionState();

}

-/// Given that the input MI is before a partial terminator sequence TSeq, return

-/// true if M + TSeq also a partial terminator sequence.

-///

-/// A Terminator sequence is a sequence of MachineInstrs which at this point in

-/// lowering copy vregs into physical registers, which are then passed into

-/// terminator instructors so we can satisfy ABI constraints. A partial

-/// terminator sequence is an improper subset of a terminator sequence (i.e. it

-/// may be the whole terminator sequence).

-static bool MIIsInTerminatorSequence(const MachineInstr &MI) {

- // If we do not have a copy or an implicit def, we return true if and only if

- // MI is a debug value.

- if (!MI.isCopy() && !MI.isImplicitDef())

- // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the

- // physical registers if there is debug info associated with the terminator

- // of our mbb. We want to include said debug info in our terminator

- // sequence, so we return true in that case.

- return MI.isDebugValue();

- // We have left the terminator sequence if we are not doing one of the

- // following:

- //

- // 1. Copying a vreg into a physical register.

- // 2. Copying a vreg into a vreg.

- // 3. Defining a register via an implicit def.

- // OPI should always be a register definition...

- MachineInstr::const_mop_iterator OPI = MI.operands_begin();

- if (!OPI->isReg() || !OPI->isDef())

- return false;

- // Defining any register via an implicit def is always ok.

- if (MI.isImplicitDef())

- return true;

- // Grab the copy source...

- MachineInstr::const_mop_iterator OPI2 = OPI;

- ++OPI2;

- assert(OPI2 != MI.operands_end()

- && "Should have a copy implying we should have 2 arguments.");

- // Make sure that the copy dest is not a vreg when the copy source is a

- // physical register.

- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&

- Register::isPhysicalRegister(OPI2->getReg())))

- return false;

- return true;

-/// Find the split point at which to splice the end of BB into its success stack

-/// protector check machine basic block.

-///

-/// On many platforms, due to ABI constraints, terminators, even before register

-/// allocation, use physical registers. This creates an issue for us since

-/// physical registers at this point can not travel across basic

-/// blocks. Luckily, selectiondag always moves physical registers into vregs

-/// when they enter functions and moves them through a sequence of copies back

-/// into the physical registers right before the terminator creating a

-/// ``Terminator Sequence''. This function is searching for the beginning of the

-/// terminator sequence so that we can ensure that we splice off not just the

-/// terminator, but additionally the copies that move the vregs into the

-/// physical registers.

-static MachineBasicBlock::iterator

-FindSplitPointForStackProtector(MachineBasicBlock *BB,

- const TargetInstrInfo &TII) {

- MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();

- if (SplitPoint == BB->begin())

- return SplitPoint;

- MachineBasicBlock::iterator Start = BB->begin();

- MachineBasicBlock::iterator Previous = SplitPoint;

- --Previous;

- if (TII.isTailCall(*SplitPoint) &&

- Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {

- // call itself, then we must insert before the sequence even starts. For

- // example:

- // <split point>

- // ADJCALLSTACKDOWN ...

- // <Moves>

- // ADJCALLSTACKUP ...

- // TAILJMP somewhere

- // On the other hand, it could be an unrelated call in which case this tail call

- // has to register moves of its own and should be the split point. For example:

- // ADJCALLSTACKDOWN

- // CALL something_else

- // ADJCALLSTACKUP

- // <split point>

- // TAILJMP somewhere

- do {

- --Previous;

- if (Previous->isCall())

- return SplitPoint;

- } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());

- return Previous;

- }

- while (MIIsInTerminatorSequence(*Previous)) {

- SplitPoint = Previous;

- if (Previous == Start)

- break;

- --Previous;

- }

- return SplitPoint;

void

SelectionDAGISel::FinishBasicBlock() {

LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "

@@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {

// Add load and check to the basicblock.

FuncInfo->MBB = ParentMBB;

FuncInfo->InsertPt =

- FindSplitPointForStackProtector(ParentMBB, *TII);

+ findSplitPointForStackProtector(ParentMBB, *TII);

SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);

CurDAG->setRoot(SDB->getRoot());

SDB->clear();

@@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {

// register allocation issues caused by us splitting the parent mbb. The

// register allocator will clean up said virtual copies later on.

MachineBasicBlock::iterator SplitPoint =

- FindSplitPointForStackProtector(ParentMBB, *TII);

+ findSplitPointForStackProtector(ParentMBB, *TII);

// Splice the terminator of ParentMBB into SuccessMBB.

SuccessMBB->splice(SuccessMBB->end(), ParentMBB,

@@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {

// test, and delete the last bit test.

MachineBasicBlock *NextMBB;

- if (BTB.ContiguousRange && j + 2 == ej) {

- // Second-to-last bit-test with contiguous range: fall through to the

- // target of the final bit test.

+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {

+ // Second-to-last bit-test with contiguous range or omitted range

+ // check: fall through to the target of the final bit test.

NextMBB = BTB.Cases[j + 1].TargetBB;

} else if (j + 1 == ej) {

// For the last bit test, fall through to Default.

@@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {

SDB->clear();

CodeGenAndEmitDAG();

- if (BTB.ContiguousRange && j + 2 == ej) {

+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {

// Since we're not going to use the final bit test, remove it.

BTB.Cases.pop_back();

break;

@@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {

else

Msg << "unknown intrinsic #" << iid;

}

- report_fatal_error(Msg.str());

+ report_fatal_error(Twine(Msg.str()));

}

char SelectionDAGISel::ID = 0;

diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a903c2401264..e2db9633bfb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

@@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(

StatepointLoweringInfo SI(DAG);

unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();

populateCallLoweringInfo(

- SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,

+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,

ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),

false);

if (!VarArgDisallowed)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1c1dae8f953f..e4a69adff05b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -26,6 +26,7 @@

#include "llvm/IR/LLVMContext.h"

#include "llvm/MC/MCAsmInfo.h"

#include "llvm/MC/MCExpr.h"

+#include "llvm/Support/DivisionByConstantInfo.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,

TargetLoweringOpt &TLO) const {

EVT VT = Op.getValueType();

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);

}

@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,

}

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,

AssumeSingleUse);

@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

DAG.getDataLayout().isLittleEndian()) {

unsigned Scale = NumDstEltBits / NumSrcEltBits;

unsigned NumSrcElts = SrcVT.getVectorNumElements();

- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);

for (unsigned i = 0; i != Scale; ++i) {

unsigned Offset = i * NumSrcEltBits;

APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);

- if (!Sub.isNullValue()) {

+ if (!Sub.isZero()) {

DemandedSrcBits |= Sub;

for (unsigned j = 0; j != NumElts; ++j)

if (DemandedElts[j])

@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

DAG.getDataLayout().isLittleEndian()) {

unsigned Scale = NumSrcEltBits / NumDstEltBits;

unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;

- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);

for (unsigned i = 0; i != NumElts; ++i)

if (DemandedElts[i]) {

unsigned Offset = (i % Scale) * NumDstEltBits;

@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

break;

}

case ISD::INSERT_SUBVECTOR: {

- // If we don't demand the inserted subvector, return the base vector.

SDValue Vec = Op.getOperand(0);

SDValue Sub = Op.getOperand(1);

uint64_t Idx = Op.getConstantOperandVal(2);

unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)

+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);

+ // If we don't demand the inserted subvector, return the base vector.

+ if (DemandedSubElts == 0)

return Vec;

+ // If this simply widens the lowest subvector, see if we can do it earlier.

+ if (Idx == 0 && Vec.isUndef()) {

+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(

+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))

+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

+ Op.getOperand(0), NewSub, Op.getOperand(2));

+ }

break;

}

case ISD::VECTOR_SHUFFLE: {

@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

unsigned Depth) const {

EVT VT = Op.getValueType();

APInt DemandedElts = VT.isVector()

- ? APInt::getAllOnesValue(VT.getVectorNumElements())

+ ? APInt::getAllOnes(VT.getVectorNumElements())

: APInt(1, 1);

return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,

Depth);

@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(

SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,

unsigned Depth) const {

- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());

+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());

return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,

Depth);

}

@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(

}

// If this is the root being simplified, allow it to have multiple uses,

// just set the DemandedBits/Elts to all bits.

- DemandedBits = APInt::getAllOnesValue(BitWidth);

- DemandedElts = APInt::getAllOnesValue(NumElts);

+ DemandedBits = APInt::getAllOnes(BitWidth);

+ DemandedElts = APInt::getAllOnes(NumElts);

} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {

// Not demanding any bits/elts from Op.

return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(

unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);

APInt DemandedSrcElts = DemandedElts;

- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);

KnownBits KnownSub, KnownSrc;

if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,

@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(

Known = KnownBits::commonBits(Known, KnownSrc);

// Attempt to avoid multi-use src if we don't need anything from it.

- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||

- !DemandedSrcElts.isAllOnesValue()) {

+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||

+ !DemandedSrcElts.isAllOnes()) {

SDValue NewSub = SimplifyMultipleUseDemandedBits(

Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);

SDValue NewSrc = SimplifyMultipleUseDemandedBits(

@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

// Attempt to avoid multi-use src if we don't need anything from it.

- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {

+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {

SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(

Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);

if (DemandedSrc) {

@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {

SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(

// If the RHS is a constant, see if we can change it. Don't alter a -1

// constant because that's a 'not' op, and that is better for combining

// and codegen.

- if (!C->isAllOnesValue() &&

- DemandedBits.isSubsetOf(C->getAPIntValue())) {

+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {

// We're flipping all demanded bits. Flip the undemanded bits too.

SDValue New = TLO.DAG.getNOT(dl, Op0, VT);

return TLO.CombineTo(Op, New);

@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(

}

// If we can't turn this into a 'not', try to shrink the constant.

- if (!C || !C->isAllOnesValue())

+ if (!C || !C->isAllOnes())

if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))

return true;

@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(

// always convert this into a logical shr, even if the shift amount is

// variable. The low bit of the shift cannot be an input sign bit unless

// the shift amount is >= the size of the datatype, which is undefined.

- if (DemandedBits.isOneValue())

+ if (DemandedBits.isOne())

return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));

if (const APInt *SA =

@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(

Known.One.setHighBits(ShAmt);

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {

SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);

if (DemandedOp0) {

@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(

// If only 1 bit is demanded, replace with PARITY as long as we're before

// op legalization.

// FIXME: Limit to scalars for now.

- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())

+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())

return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,

Op.getOperand(0)));

@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(

// If we only care about the highest bit, don't bother shifting right.

if (DemandedBits.isSignMask()) {

- unsigned NumSignBits =

- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);

- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;

+ unsigned MinSignedBits =

+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);

+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;

// However if the input is already sign extended we expect the sign

// extension to be dropped altogether later and do not simplify.

if (!AlreadySignExtended) {

@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(

// Demand the bits from every vector element without a constant index.

unsigned NumSrcElts = SrcEltCnt.getFixedValue();

- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);

+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);

if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))

if (CIdx->getAPIntValue().ult(NumSrcElts))

DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());

@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedSrcBits.isAllOnesValue() ||

- !DemandedSrcElts.isAllOnesValue()) {

+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {

if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(

Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {

SDValue NewOp =

@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(

TLO.DAG.getDataLayout().isLittleEndian()) {

unsigned Scale = BitWidth / NumSrcEltBits;

unsigned NumSrcElts = SrcVT.getVectorNumElements();

- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);

for (unsigned i = 0; i != Scale; ++i) {

unsigned Offset = i * NumSrcEltBits;

APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);

- if (!Sub.isNullValue()) {

+ if (!Sub.isZero()) {

DemandedSrcBits |= Sub;

for (unsigned j = 0; j != NumElts; ++j)

if (DemandedElts[j])

@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(

TLO.DAG.getDataLayout().isLittleEndian()) {

unsigned Scale = NumSrcEltBits / BitWidth;

unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;

- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);

for (unsigned i = 0; i != NumElts; ++i)

if (DemandedElts[i]) {

unsigned Offset = (i % Scale) * BitWidth;

@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(

}

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {

SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);

SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(

// is probably not useful (and could be detrimental).

ConstantSDNode *C = isConstOrConstSplat(Op1);

APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);

- if (C && !C->isAllOnesValue() && !C->isOne() &&

- (C->getAPIntValue() | HighMask).isAllOnesValue()) {

+ if (C && !C->isAllOnes() && !C->isOne() &&

+ (C->getAPIntValue() | HighMask).isAllOnes()) {

SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);

// Disable the nsw and nuw flags. We can no longer guarantee that we

// won't wrap after simplification.

@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,

return SDValue();

};

- APInt KnownUndef = APInt::getNullValue(NumElts);

+ APInt KnownUndef = APInt::getZero(NumElts);

for (unsigned i = 0; i != NumElts; ++i) {

// If both inputs for this element are either constant or undef and match

// the element type, compute the constant/undef result for this element of

@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

unsigned NumElts = DemandedElts.getBitWidth();

assert(VT.isVector() && "Expected vector op");

- KnownUndef = KnownZero = APInt::getNullValue(NumElts);

+ KnownUndef = KnownZero = APInt::getZero(NumElts);

// TODO: For now we assume we know nothing about scalable vectors.

if (VT.isScalableVector())

@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(

return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,

KnownZero, TLO, Depth + 1);

- APInt SrcZero, SrcUndef;

- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);

+ APInt SrcDemandedElts, SrcZero, SrcUndef;

// Bitcast from 'large element' src vector to 'small element' vector, we

// must demand a source element if any DemandedElt maps to it.

if ((NumElts % NumSrcElts) == 0) {

unsigned Scale = NumElts / NumSrcElts;

- for (unsigned i = 0; i != NumElts; ++i)

- if (DemandedElts[i])

- SrcDemandedElts.setBit(i / Scale);

+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);

if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,

TLO, Depth + 1))

return true;

@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// TODO - bigendian once we have test coverage.

if (TLO.DAG.getDataLayout().isLittleEndian()) {

unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();

- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);

+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);

for (unsigned i = 0; i != NumElts; ++i)

if (DemandedElts[i]) {

unsigned Ofs = (i % Scale) * EltSizeInBits;

@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// of this vector.

if ((NumSrcElts % NumElts) == 0) {

unsigned Scale = NumSrcElts / NumElts;

- for (unsigned i = 0; i != NumElts; ++i)

- if (DemandedElts[i])

- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);

+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);

if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,

TLO, Depth + 1))

return true;

@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// the output element will be as well, assuming it was demanded.

for (unsigned i = 0; i != NumElts; ++i) {

if (DemandedElts[i]) {

- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())

+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())

KnownZero.setBit(i);

- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())

+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())

KnownUndef.setBit(i);

}

@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

}

case ISD::BUILD_VECTOR: {

// Check all elements and simplify any unused elements with UNDEF.

- if (!DemandedElts.isAllOnesValue()) {

+ if (!DemandedElts.isAllOnes()) {

// Don't simplify BROADCASTS.

if (llvm::any_of(Op->op_values(),

[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {

@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

unsigned NumSubElts = Sub.getValueType().getVectorNumElements();

APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);

APInt DemandedSrcElts = DemandedElts;

- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);

+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);

APInt SubUndef, SubZero;

if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,

@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

KnownZero.insertBits(SubZero, Idx);

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedSrcElts.isAllOnesValue() ||

- !DemandedSubElts.isAllOnesValue()) {

+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {

SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(

Src, DemandedSrcElts, TLO.DAG, Depth + 1);

SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(

@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

KnownZero = SrcZero.extractBits(NumElts, Idx);

// Attempt to avoid multi-use ops if we don't need anything from them.

- if (!DemandedElts.isAllOnesValue()) {

+ if (!DemandedElts.isAllOnes()) {

SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(

Src, DemandedSrcElts, TLO.DAG, Depth + 1);

if (NewSrc) {

@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(

if (DemandedElts.isSubsetOf(KnownUndef))

return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));

KnownUndef.clearAllBits();

+ // zext - if we just need the bottom element then we can mask:

+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.

+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&

+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&

+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {

+ SDLoc DL(Op);

+ EVT SrcVT = Src.getValueType();

+ EVT SrcSVT = SrcVT.getScalarType();

+ SmallVector<SDValue> MaskElts;

+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));

+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));

+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);

+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(

+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {

+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);

+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));

+ }

}

break;

}

@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// Attempt to avoid multi-use ops if we don't need anything from them.

// TODO - use KnownUndef to relax the demandedelts?

- if (!DemandedElts.isAllOnesValue())

+ if (!DemandedElts.isAllOnes())

if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

return true;

break;

@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// Attempt to avoid multi-use ops if we don't need anything from them.

// TODO - use KnownUndef to relax the demandedelts?

- if (!DemandedElts.isAllOnesValue())

+ if (!DemandedElts.isAllOnes())

if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

return true;

break;

@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// Attempt to avoid multi-use ops if we don't need anything from them.

// TODO - use KnownUndef to relax the demandedelts?

- if (!DemandedElts.isAllOnesValue())

+ if (!DemandedElts.isAllOnes())

if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))

return true;

break;

@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

return true;

} else {

KnownBits Known;

- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);

+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);

if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,

TLO, Depth, AssumeSingleUse))

return true;

@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {

case UndefinedBooleanContent:

return CVal[0];

case ZeroOrOneBooleanContent:

- return CVal.isOneValue();

+ return CVal.isOne();

case ZeroOrNegativeOneBooleanContent:

- return CVal.isAllOnesValue();

+ return CVal.isAllOnes();

}

llvm_unreachable("Invalid boolean contents");

@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {

if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)

return !CN->getAPIntValue()[0];

- return CN->isNullValue();

+ return CN->isZero();

}

bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,

@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,

return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));

case TargetLowering::UndefinedBooleanContent:

case TargetLowering::ZeroOrNegativeOneBooleanContent:

- return N->isAllOnesValue() && SExt;

+ return N->isAllOnes() && SExt;

}

llvm_unreachable("Unexpected enumeration.");

}

@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,

// Bail out if the compare operand that we want to turn into a zero is

// already a zero (otherwise, infinite loop).

auto *YConst = dyn_cast<ConstantSDNode>(Y);

- if (YConst && YConst->isNullValue())

+ if (YConst && YConst->isZero())

return SDValue();

// Transform this into: ~X & Y == 0.

@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(

EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,

DAGCombinerInfo &DCI, const SDLoc &DL) const {

assert(isConstOrConstSplat(N1C) &&

- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&

+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&

"Should be a comparison with 0.");

assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

"Valid only for [in]equality comparisons.");

@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an

// equality comparison, then we're just comparing whether X itself is

// zero.

- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&

+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&

N0.getOperand(0).getOpcode() == ISD::CTLZ &&

isPowerOf2_32(N0.getScalarValueSizeInBits())) {

if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {

@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

(isConstFalseVal(N1C) ||

isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {

- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||

- (!N1C->isNullValue() && Cond == ISD::SETNE);

+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||

+ (!N1C->isZero() && Cond == ISD::SETNE);

if (!Inverse)

return TopSetCC;

@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// Otherwise, make this a use of a zext.

return DAG.getSetCC(dl, VT, ZextOp,

DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);

- } else if ((N1C->isNullValue() || N1C->isOne()) &&

- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

+ } else if ((N1C->isZero() || N1C->isOne()) &&

+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC

if (N0.getOpcode() == ISD::SETCC &&

isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&

@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// icmp eq/ne (urem %x, %y), 0

// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':

// icmp eq/ne %x, 0

- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&

+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&

(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));

KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));

@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);

}

+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0

+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0

+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&

+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&

+ N1C && N1C->isAllOnes()) {

+ return DAG.getSetCC(dl, VT, N0.getOperand(0),

+ DAG.getConstant(0, dl, OpVT),

+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);

+ }

if (SDValue V =

optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))

return V;

@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {

// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0

- if (C1.isNullValue())

+ if (C1.isZero())

if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(

VT, N0, N1, Cond, DCI, dl))

return CC;

@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// For example, when high 32-bits of i64 X are known clear:

// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0

// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1

- bool CmpZero = N1C->getAPIntValue().isNullValue();

- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();

+ bool CmpZero = N1C->getAPIntValue().isZero();

+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();

if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {

// Match or(lo,shl(hi,bw/2)) pattern.

auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {

@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

N0.getOpcode() == ISD::AND && N0.hasOneUse()) {

if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

const APInt &AndRHSC = AndRHS->getAPIntValue();

- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {

+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {

unsigned ShiftBits = AndRHSC.countTrailingZeros();

if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {

SDValue Shift =

@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// When division is cheap or optimizing for minimum size,

// fall through to DIVREM creation by skipping this fold.

- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {

+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {

if (N0.getOpcode() == ISD::UREM) {

if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))

return Folded;

@@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,

getSimpleValueType(DL, STy->getElementType(ResNo));

} else {

assert(ResNo == 0 && "Asm only has one result!");

- OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());

+ OpInfo.ConstraintVT =

+ getAsmOperandValueType(DL, Call.getType()).getSimpleVT();

}

++ResNo;

break;

@@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,

SmallVector<SDValue, 16> Shifts, Factors;

auto BuildSDIVPattern = [&](ConstantSDNode *C) {

- if (C->isNullValue())

+ if (C->isZero())

return false;

APInt Divisor = C->getAPIntValue();

unsigned Shift = Divisor.countTrailingZeros();

@@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,

SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;

auto BuildSDIVPattern = [&](ConstantSDNode *C) {

- if (C->isNullValue())

+ if (C->isZero())

return false;

const APInt &Divisor = C->getAPIntValue();

- APInt::ms magics = Divisor.magic();

+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);

int NumeratorFactor = 0;

int ShiftMask = -1;

- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {

+ if (Divisor.isOne() || Divisor.isAllOnes()) {

// If d is +1/-1, we just multiply the numerator by +1/-1.

NumeratorFactor = Divisor.getSExtValue();

- magics.m = 0;

- magics.s = 0;

+ magics.Magic = 0;

+ magics.ShiftAmount = 0;

ShiftMask = 0;

- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {

+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {

// If d > 0 and m < 0, add the numerator.

NumeratorFactor = 1;

- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {

+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {

// If d < 0 and m > 0, subtract the numerator.

NumeratorFactor = -1;

}

- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));

+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));

Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));

- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));

+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));

ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));

return true;

};

@@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,

SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;

auto BuildUDIVPattern = [&](ConstantSDNode *C) {

- if (C->isNullValue())

+ if (C->isZero())

return false;

// FIXME: We should use a narrower constant when the upper

// bits are known to be zero.

const APInt& Divisor = C->getAPIntValue();

- APInt::mu magics = Divisor.magicu();

+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);

unsigned PreShift = 0, PostShift = 0;

// If the divisor is even, we can avoid using the expensive fixup by

// shifting the divided value upfront.

- if (magics.a != 0 && !Divisor[0]) {

+ if (magics.IsAdd != 0 && !Divisor[0]) {

PreShift = Divisor.countTrailingZeros();

// Get magic number for the shifted divisor.

- magics = Divisor.lshr(PreShift).magicu(PreShift);

- assert(magics.a == 0 && "Should use cheap fixup now");

+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);

+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");

}

- APInt Magic = magics.m;

+ APInt Magic = magics.Magic;

unsigned SelNPQ;

- if (magics.a == 0 || Divisor.isOneValue()) {

- assert(magics.s < Divisor.getBitWidth() &&

+ if (magics.IsAdd == 0 || Divisor.isOne()) {

+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&

"We shouldn't generate an undefined shift!");

- PostShift = magics.s;

+ PostShift = magics.ShiftAmount;

SelNPQ = false;

} else {

- PostShift = magics.s - 1;

+ PostShift = magics.ShiftAmount - 1;

SelNPQ = true;

}

@@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,

MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));

NPQFactors.push_back(

DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)

- : APInt::getNullValue(EltBits),

+ : APInt::getZero(EltBits),

dl, SVT));

PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));

UseNPQ |= SelNPQ;

@@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,

auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {

// Division by 0 is UB. Leave it to be constant-folded elsewhere.

- if (CDiv->isNullValue())

+ if (CDiv->isZero())

return false;

const APInt &D = CDiv->getAPIntValue();

const APInt &Cmp = CCmp->getAPIntValue();

- ComparingWithAllZeros &= Cmp.isNullValue();

+ ComparingWithAllZeros &= Cmp.isZero();

// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,

// if C2 is not less than C1, the comparison is always false.

@@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,

// If all lanes are tautological (either all divisors are ones, or divisor

// is not greater than the constant we are comparing with),

// we will prefer to avoid the fold.

- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;

+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;

HadTautologicalLanes |= TautologicalLane;

AllLanesAreTautological &= TautologicalLane;

// If we are comparing with non-zero, we need'll need to subtract said

// comparison value from the LHS. But there is no point in doing that if

// every lane where we are comparing with non-zero is tautological..

- if (!Cmp.isNullValue())

+ if (!Cmp.isZero())

AllComparisonsWithNonZerosAreTautological &= TautologicalLane;

// Decompose D into D0 * 2^K

unsigned K = D.countTrailingZeros();

- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");

+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");

APInt D0 = D.lshr(K);

// D is even if it has trailing zeros.

HadEvenDivisor |= (K != 0);

// D is a power-of-two if D0 is one.

// If all divisors are power-of-two, we will prefer to avoid the fold.

- AllDivisorsArePowerOfTwo &= D0.isOneValue();

+ AllDivisorsArePowerOfTwo &= D0.isOne();

// P = inv(D0, 2^W)

// 2^W requires W + 1 bits, so we have to extend and then truncate.

@@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,

APInt P = D0.zext(W + 1)

.multiplicativeInverse(APInt::getSignedMinValue(W + 1))

.trunc(W);

- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable

- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable

+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");

// Q = floor((2^W - 1) u/ D)

// R = ((2^W - 1) u% D)

APInt Q, R;

- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);

+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);

// If we are comparing with zero, then that comparison constant is okay,

// else it may need to be one less than that.

if (Cmp.ugt(R))

Q -= 1;

- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&

+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&

"We are expecting that K is always less than all-ones for ShSVT");

// If the lane is tautological the result can be constant-folded.

@@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

// TODO: Could support comparing with non-zero too.

ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);

- if (!CompTarget || !CompTarget->isNullValue())

+ if (!CompTarget || !CompTarget->isZero())

return SDValue();

bool HadIntMinDivisor = false;

@@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

auto BuildSREMPattern = [&](ConstantSDNode *C) {

// Division by 0 is UB. Leave it to be constant-folded elsewhere.

- if (C->isNullValue())

+ if (C->isZero())

return false;

// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.

@@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

HadIntMinDivisor |= D.isMinSignedValue();

// If all divisors are ones, we will prefer to avoid the fold.

- HadOneDivisor |= D.isOneValue();

- AllDivisorsAreOnes &= D.isOneValue();

+ HadOneDivisor |= D.isOne();

+ AllDivisorsAreOnes &= D.isOne();

// Decompose D into D0 * 2^K

unsigned K = D.countTrailingZeros();

- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");

+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");

APInt D0 = D.lshr(K);

if (!D.isMinSignedValue()) {

@@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

// D is a power-of-two if D0 is one. This includes INT_MIN.

// If all divisors are power-of-two, we will prefer to avoid the fold.

- AllDivisorsArePowerOfTwo &= D0.isOneValue();

+ AllDivisorsArePowerOfTwo &= D0.isOne();

// P = inv(D0, 2^W)

// 2^W requires W + 1 bits, so we have to extend and then truncate.

@@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

APInt P = D0.zext(W + 1)

.multiplicativeInverse(APInt::getSignedMinValue(W + 1))

.trunc(W);

- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable

- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable

+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");

// A = floor((2^(W - 1) - 1) / D0) & -2^K

APInt A = APInt::getSignedMaxValue(W).udiv(D0);

@@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

// Q = floor((2 * A) / (2^K))

APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));

- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&

+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&

"We are expecting that A is always less than all-ones for SVT");

- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&

+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&

"We are expecting that K is always less than all-ones for ShSVT");

// If the divisor is 1 the result can be constant-folded. Likewise, we

// don't care about INT_MIN lanes, those can be set to undef if appropriate.

- if (D.isOneValue()) {

+ if (D.isOne()) {

// Set P, A and K to a bogus values so we can try to splat them.

P = 0;

A = -1;

@@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

SDValue IntMax = DAG.getConstant(

APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);

SDValue Zero =

- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);

+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);

// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.

SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);

@@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,

// the destination signmask can't be represented by the float, so we can

// just use FP_TO_SINT directly.

const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);

- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));

+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));

APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());

if (APFloat::opOverflow &

APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {

@@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,

return SDValue();

}

-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,

- SelectionDAG &DAG) const {

+// Only expand vector types if we have the appropriate vector bit operations.

+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {

+ assert(VT.isVector() && "Expected vector type");

+ unsigned Len = VT.getScalarSizeInBits();

+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&

+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&

+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&

+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&

+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);

+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {

SDLoc dl(Node);

EVT VT = Node->getValueType(0);

EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());

@@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,

// TODO: Add support for irregular type lengths.

if (!(Len <= 128 && Len % 8 == 0))

- return false;

+ return SDValue();

// Only expand vector types if we have the appropriate vector bit operations.

- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||

- !isOperationLegalOrCustom(ISD::SUB, VT) ||

- !isOperationLegalOrCustom(ISD::SRL, VT) ||

- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||

- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))

- return false;

+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))

+ return SDValue();

// This is the "best" algorithm from

// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel

@@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,

DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),

DAG.getConstant(Len - 8, dl, ShVT));

- Result = Op;

- return true;

+ return Op;

}

-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,

- SelectionDAG &DAG) const {

+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {

SDLoc dl(Node);

EVT VT = Node->getValueType(0);

EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());

@@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,

// If the non-ZERO_UNDEF version is supported we can use that instead.

if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&

- isOperationLegalOrCustom(ISD::CTLZ, VT)) {

- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);

- return true;

- }

+ isOperationLegalOrCustom(ISD::CTLZ, VT))

+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);

// If the ZERO_UNDEF version is supported use that and handle the zero case.

if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {

@@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,

SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);

SDValue Zero = DAG.getConstant(0, dl, VT);

SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);

- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,

+ return DAG.getSelect(dl, VT, SrcIsZero,

DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);

- return true;

}

// Only expand vector types if we have the appropriate vector bit operations.

+ // This includes the operations needed to expand CTPOP if it isn't supported.

if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||

- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||

+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&

+ !canExpandVectorCTPOP(*this, VT)) ||

!isOperationLegalOrCustom(ISD::SRL, VT) ||

!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))

- return false;

+ return SDValue();

// for now, we do this:

// x = x | (x >> 1);

@@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,

DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));

}

Op = DAG.getNOT(dl, Op, VT);

- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);

- return true;

+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);

}

-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,

- SelectionDAG &DAG) const {

+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {

SDLoc dl(Node);

EVT VT = Node->getValueType(0);

SDValue Op = Node->getOperand(0);

@@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,

// If the non-ZERO_UNDEF version is supported we can use that instead.

if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&

- isOperationLegalOrCustom(ISD::CTTZ, VT)) {

- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);

- return true;

- }

+ isOperationLegalOrCustom(ISD::CTTZ, VT))

+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);

// If the ZERO_UNDEF version is supported use that and handle the zero case.

if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {

@@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,

SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);

SDValue Zero = DAG.getConstant(0, dl, VT);

SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);

- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,

+ return DAG.getSelect(dl, VT, SrcIsZero,

DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);

- return true;

}

// Only expand vector types if we have the appropriate vector bit operations.

+ // This includes the operations needed to expand CTPOP if it isn't supported.

if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||

(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&

- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||

+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&

+ !canExpandVectorCTPOP(*this, VT)) ||

!isOperationLegalOrCustom(ISD::SUB, VT) ||

!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||

!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

- return false;

+ return SDValue();

// for now, we use: { return popcount(~x & (x - 1)); }

// unless the target has ctlz but not ctpop, in which case we use:

@@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,

// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.

if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {

- Result =

- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),

- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));

- return true;

+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),

+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));

}

- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);

- return true;

+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);

}

-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,

- SelectionDAG &DAG, bool IsNegative) const {

+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,

+ bool IsNegative) const {

SDLoc dl(N);

EVT VT = N->getValueType(0);

EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());

@@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,

if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&

isOperationLegal(ISD::SMAX, VT)) {

SDValue Zero = DAG.getConstant(0, dl, VT);

- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,

- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

- return true;

+ return DAG.getNode(ISD::SMAX, dl, VT, Op,

+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

}

// abs(x) -> umin(x,sub(0,x))

if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&

isOperationLegal(ISD::UMIN, VT)) {

SDValue Zero = DAG.getConstant(0, dl, VT);

- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,

- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

- return true;

+ return DAG.getNode(ISD::UMIN, dl, VT, Op,

+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

}

// 0 - abs(x) -> smin(x, sub(0,x))

if (IsNegative && isOperationLegal(ISD::SUB, VT) &&

isOperationLegal(ISD::SMIN, VT)) {

SDValue Zero = DAG.getConstant(0, dl, VT);

- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,

- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

- return true;

+ return DAG.getNode(ISD::SMIN, dl, VT, Op,

+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));

}

// Only expand vector types if we have the appropriate vector operations.

@@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,

(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||

(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||

!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))

- return false;

+ return SDValue();

SDValue Shift =

DAG.getNode(ISD::SRA, dl, VT, Op,

DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));

if (!IsNegative) {

SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);

- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);

- } else {

- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))

- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);

- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);

+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);

}

- return true;

+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))

+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);

+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);

}

SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {

@@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {

// TODO: We can easily support i4/i2 legal types if any target ever does.

if (Sz >= 8 && isPowerOf2_32(Sz)) {

// Create the masks - repeating the pattern every byte.

- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));

- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));

- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));

- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));

- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));

- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));

+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));

+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));

+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));

// BSWAP if the type is wider than a single byte.

Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);

- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)

- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));

- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));

- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));

+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)

+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));

+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));

+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));

Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));

Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)

- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));

- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));

- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));

+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)

+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));

+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));

+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));

Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));

Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)

- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));

- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));

- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));

+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)

+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));

+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));

+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));

Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));

Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);

return Tmp;

@@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,

static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,

EVT VecVT, const SDLoc &dl,

- unsigned NumSubElts) {

- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))

- return Idx;

+ ElementCount SubEC) {

+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&

+ "Cannot index a scalable vector within a fixed-width vector");

- EVT IdxVT = Idx.getValueType();

unsigned NElts = VecVT.getVectorMinNumElements();

- if (VecVT.isScalableVector()) {

+ unsigned NumSubElts = SubEC.getKnownMinValue();

+ EVT IdxVT = Idx.getValueType();

+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {

// If this is a constant index and we know the value plus the number of the

// elements in the subvector minus one is less than the minimum number of

// elements then it's safe to return Idx.

@@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,

unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.

assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&

"Converting bits to bytes lost precision");

- // Scalable vectors don't need clamping as these are checked at compile time

- if (SubVecVT.isFixedLengthVector()) {

- assert(SubVecVT.getVectorElementType() == EltVT &&

- "Sub-vector must be a fixed vector with matching element type");

- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,

- SubVecVT.getVectorNumElements());

- }

+ assert(SubVecVT.getVectorElementType() == EltVT &&

+ "Sub-vector must be a vector with matching element type");

+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,

+ SubVecVT.getVectorElementCount());

EVT IdxVT = Index.getValueType();

+ if (SubVecVT.isScalableVector())

+ Index =

+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,

+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));

Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,

DAG.getConstant(EltSize, dl, IdxVT));

@@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

SDLoc dl(Op);

if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {

- if (C->isNullValue() && CC == ISD::SETEQ) {

+ if (C->isZero() && CC == ISD::SETEQ) {

EVT VT = Op.getOperand(0).getValueType();

SDValue Zext = Op.getOperand(0);

if (VT.bitsLT(MVT::i32)) {

@@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,

(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);

// Scaling is unimportant for bytes, canonicalize to unscaled.

- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {

- IsScaledIndex = false;

- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;

- }

+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)

+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;

return IndexType;

}

@@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {

return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);

}

- // SatMax -> Overflow && SumDiff < 0

- // SatMin -> Overflow && SumDiff >= 0

+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff

APInt MinVal = APInt::getSignedMinValue(BitWidth);

- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);

SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);

- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);

+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,

+ DAG.getConstant(BitWidth - 1, dl, VT));

+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);

return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);

}

@@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

APInt MaxVal = APInt::getSignedMaxValue(VTSize);

SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);

- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);

+ // Xor the inputs, if resulting sign bit is 0 the product will be

+ // positive, else negative.

+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);

+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);

+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);

return DAG.getSelect(dl, VT, Overflow, Result, Product);

} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {

SDValue Result =

@@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(

// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.

unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;

- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {

+ if (isOperationLegal(OpcSat, LHS.getValueType())) {

SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);

SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);

Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);

@@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,

EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);

if (VT.isVector())

- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,

- VT.getVectorNumElements());

+ WideVT =

+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());

SDValue BottomHalf;

SDValue TopHalf;