src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
commit	1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree	2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen/SelectionDAG
parent	e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)

vendor/llvm/llvm-trunk-r375505 vendor/llvm

Notes

Diffstat (limited to 'lib/CodeGen/SelectionDAG')

-rw-r--r--

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

1758

-rw-r--r--

lib/CodeGen/SelectionDAG/FastISel.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/InstrEmitter.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

222

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

430

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

510

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypes.h

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

139

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h

-rw-r--r--

lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

283

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

495

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/StatepointLowering.cpp

-rw-r--r--

lib/CodeGen/SelectionDAG/TargetLowering.cpp

1406

25 files changed, 3619 insertions, 2119 deletions

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49c922f560fa..e8950b58d42d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -24,7 +24,6 @@

#include "llvm/ADT/Optional.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SetVector.h"

-#include "llvm/ADT/SmallBitVector.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

@@ -111,10 +110,20 @@ static cl::opt<bool>

MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),

cl::desc("DAG combiner may split indexing from loads"));

+static cl::opt<bool>

+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),

+ cl::desc("DAG combiner enable merging multiple stores "

+ "into a wider store"));

static cl::opt<unsigned> TokenFactorInlineLimit(

"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),

cl::desc("Limit the number of operands to inline for Token Factors"));

+static cl::opt<unsigned> StoreMergeDependenceLimit(

+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),

+ cl::desc("Limit the number of times for the same StoreNode and RootNode "

+ "to bail out in store merging dependence check"));

namespace {

class DAGCombiner {

@@ -152,6 +161,14 @@ namespace {

/// which have not yet been combined to the worklist.

SmallPtrSet<SDNode *, 32> CombinedNodes;

+ /// Map from candidate StoreNode to the pair of RootNode and count.

+ /// The count is used to track how many times we have seen the StoreNode

+ /// with the same RootNode bail out in dependence check. If we have seen

+ /// the bail out for the same pair many times over a limit, we won't

+ /// consider the StoreNode with the same RootNode as store merging

+ /// candidate again.

+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;

// AA - Used for DAG load/store alias analysis.

AliasAnalysis *AA;

@@ -236,6 +253,7 @@ namespace {

void removeFromWorklist(SDNode *N) {

CombinedNodes.erase(N);

PruningList.remove(N);

+ StoreRootCountMap.erase(N);

auto It = WorklistMap.find(N);

if (It == WorklistMap.end())

@@ -361,6 +379,7 @@ namespace {

SDValue visitSUBE(SDNode *N);

SDValue visitSUBCARRY(SDNode *N);

SDValue visitMUL(SDNode *N);

+ SDValue visitMULFIX(SDNode *N);

SDValue useDivRem(SDNode *N);

SDValue visitSDIV(SDNode *N);

SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);

@@ -421,7 +440,6 @@ namespace {

SDValue visitFP_TO_SINT(SDNode *N);

SDValue visitFP_TO_UINT(SDNode *N);

SDValue visitFP_ROUND(SDNode *N);

- SDValue visitFP_ROUND_INREG(SDNode *N);

SDValue visitFP_EXTEND(SDNode *N);

SDValue visitFNEG(SDNode *N);

SDValue visitFABS(SDNode *N);

@@ -470,7 +488,7 @@ namespace {

SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,

SDValue N1, SDNodeFlags Flags);

- SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);

+ SDValue visitShiftByConstant(SDNode *N);

SDValue foldSelectOfConstants(SDNode *N);

SDValue foldVSelectOfConstants(SDNode *N);

@@ -497,6 +515,7 @@ namespace {

bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,

SDValue &CC) const;

bool isOneUseSetCC(SDValue N) const;

+ bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);

SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,

unsigned HiOp);

@@ -510,7 +529,7 @@ namespace {

SDValue BuildSDIVPow2(SDNode *N);

SDValue BuildUDIV(SDNode *N);

SDValue BuildLogBase2(SDValue V, const SDLoc &DL);

- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);

+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);

SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);

SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);

SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);

@@ -521,11 +540,11 @@ namespace {

SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,

bool DemandHighBits = true);

SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);

- SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,

+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,

SDValue InnerPos, SDValue InnerNeg,

unsigned PosOpcode, unsigned NegOpcode,

const SDLoc &DL);

- SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);

+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);

SDValue MatchLoadCombine(SDNode *N);

SDValue MatchStoreCombine(StoreSDNode *N);

SDValue ReduceLoadWidth(SDNode *N);

@@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {

return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);

}

+bool TargetLowering::DAGCombinerInfo::

+recursivelyDeleteUnusedNodes(SDNode *N) {

+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);

void TargetLowering::DAGCombinerInfo::

CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {

return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);

@@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {

DAG.DeleteNode(N);

}

-/// Return 1 if we can compute the negated form of the specified expression for

-/// the same cost as the expression itself, or 2 if we can compute the negated

-/// form more cheaply than the expression itself.

-static char isNegatibleForFree(SDValue Op, bool LegalOperations,

- const TargetLowering &TLI,

- const TargetOptions *Options,

- bool ForCodeSize,

- unsigned Depth = 0) {

- // fneg is removable even if it has multiple uses.

- if (Op.getOpcode() == ISD::FNEG)

- return 2;

- // Don't allow anything with multiple uses unless we know it is free.

- EVT VT = Op.getValueType();

- const SDNodeFlags Flags = Op->getFlags();

- if (!Op.hasOneUse() &&

- !(Op.getOpcode() == ISD::FP_EXTEND &&

- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))

- return 0;

- // Don't recurse exponentially.

- if (Depth > 6)

- return 0;

- switch (Op.getOpcode()) {

- default: return false;

- case ISD::ConstantFP: {

- if (!LegalOperations)

- return 1;

- // Don't invert constant FP values after legalization unless the target says

- // the negated constant is legal.

- return TLI.isOperationLegal(ISD::ConstantFP, VT) ||

- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,

- ForCodeSize);

- }

- case ISD::BUILD_VECTOR: {

- // Only permit BUILD_VECTOR of constants.

- if (llvm::any_of(Op->op_values(), [&](SDValue N) {

- return !N.isUndef() && !isa<ConstantFPSDNode>(N);

- }))

- return 0;

- if (!LegalOperations)

- return 1;

- if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&

- TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))

- return 1;

- return llvm::all_of(Op->op_values(), [&](SDValue N) {

- return N.isUndef() ||

- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,

- ForCodeSize);

- });

- }

- case ISD::FADD:

- if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())

- return 0;

- // After operation legalization, it might not be legal to create new FSUBs.

- if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))

- return 0;

- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)

- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,

- Options, ForCodeSize, Depth + 1))

- return V;

- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,

- ForCodeSize, Depth + 1);

- case ISD::FSUB:

- // We can't turn -(A-B) into B-A when we honor signed zeros.

- if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

- return 0;

- // fold (fneg (fsub A, B)) -> (fsub B, A)

- return 1;

- case ISD::FMUL:

- case ISD::FDIV:

- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))

- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,

- Options, ForCodeSize, Depth + 1))

- return V;

- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,

- ForCodeSize, Depth + 1);

- case ISD::FP_EXTEND:

- case ISD::FP_ROUND:

- case ISD::FSIN:

- return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,

- ForCodeSize, Depth + 1);

- }

-/// If isNegatibleForFree returns true, return the newly negated expression.

-static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,

- bool LegalOperations, bool ForCodeSize,

- unsigned Depth = 0) {

- // fneg is removable even if it has multiple uses.

- if (Op.getOpcode() == ISD::FNEG)

- return Op.getOperand(0);

- assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");

- const TargetOptions &Options = DAG.getTarget().Options;

- const SDNodeFlags Flags = Op->getFlags();

- switch (Op.getOpcode()) {

- default: llvm_unreachable("Unknown code");

- case ISD::ConstantFP: {

- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();

- V.changeSign();

- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());

- }

- case ISD::BUILD_VECTOR: {

- SmallVector<SDValue, 4> Ops;

- for (SDValue C : Op->op_values()) {

- if (C.isUndef()) {

- Ops.push_back(C);

- continue;

- }

- APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();

- V.changeSign();

- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));

- }

- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);

- }

- case ISD::FADD:

- assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());

- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)

- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,

- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,

- Depth + 1))

- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

- GetNegatedExpression(Op.getOperand(0), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1),

- Op.getOperand(1), Flags);

- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

- GetNegatedExpression(Op.getOperand(1), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1),

- Op.getOperand(0), Flags);

- case ISD::FSUB:

- // fold (fneg (fsub 0, B)) -> B

- if (ConstantFPSDNode *N0CFP =

- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))

- if (N0CFP->isZero())

- return Op.getOperand(1);

- // fold (fneg (fsub A, B)) -> (fsub B, A)

- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

- Op.getOperand(1), Op.getOperand(0), Flags);

- case ISD::FMUL:

- case ISD::FDIV:

- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)

- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,

- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,

- Depth + 1))

- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

- GetNegatedExpression(Op.getOperand(0), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1),

- Op.getOperand(1), Flags);

- // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))

- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

- Op.getOperand(0),

- GetNegatedExpression(Op.getOperand(1), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1), Flags);

- case ISD::FP_EXTEND:

- case ISD::FSIN:

- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

- GetNegatedExpression(Op.getOperand(0), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1));

- case ISD::FP_ROUND:

- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),

- GetNegatedExpression(Op.getOperand(0), DAG,

- LegalOperations, ForCodeSize,

- Depth + 1),

- Op.getOperand(1));

- }

// APInts must be the same size for most operations, this helper

// function zero extends the shorter of the pair so that they match.

// We provide an Offset so that we can create bitwidths that won't overflow.

@@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,

SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);

if (!OpNode.getNode())

return SDValue();

- AddToWorklist(OpNode.getNode());

return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));

}

@@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {

SDValue RV =

DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));

- AddToWorklist(N0.getNode());

if (Replace)

ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());

@@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {

bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);

for (SDNode *LN : UpdatedNodes) {

- AddToWorklist(LN);

AddUsersToWorklist(LN);

+ AddToWorklist(LN);

}

if (!NIsValid)

continue;

@@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) {

case ISD::ADDCARRY: return visitADDCARRY(N);

case ISD::SUBE: return visitSUBE(N);

case ISD::SUBCARRY: return visitSUBCARRY(N);

+ case ISD::SMULFIX:

+ case ISD::SMULFIXSAT:

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: return visitMULFIX(N);

case ISD::MUL: return visitMUL(N);

case ISD::SDIV: return visitSDIV(N);

case ISD::UDIV: return visitUDIV(N);

@@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) {

case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);

case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);

case ISD::FP_ROUND: return visitFP_ROUND(N);

- case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);

case ISD::FP_EXTEND: return visitFP_EXTEND(N);

case ISD::FNEG: return visitFNEG(N);

case ISD::FABS: return visitFABS(N);

@@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {

}

+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {

+ // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)

+ if (SDValue Carry = getAsCarry(TLI, N0)) {

+ SDValue X = N1;

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);

+ return DAG.getNode(ISD::ADDCARRY, DL,

+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,

+ Carry);

+ }

return SDValue();

}

@@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {

return SDValue();

}

+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and

+// UMULFIXSAT here.

+SDValue DAGCombiner::visitMULFIX(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ SDValue Scale = N->getOperand(2);

+ EVT VT = N0.getValueType();

+ // fold (mulfix x, undef, scale) -> 0

+ if (N0.isUndef() || N1.isUndef())

+ return DAG.getConstant(0, SDLoc(N), VT);

+ // Canonicalize constant to RHS (vector doesn't have to splat)

+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&

+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))

+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);

+ // fold (mulfix x, 0, scale) -> 0

+ if (isNullConstant(N1))

+ return DAG.getConstant(0, SDLoc(N), VT);

+ return SDValue();

SDValue DAGCombiner::visitMUL(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {

// x * 15 --> (x << 4) - x

// x * -33 --> -((x << 5) + x)

// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)

- if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {

+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {

// TODO: We could handle more general decomposition of any constant by

// having the target set a limit on number of ops and making a

// callback to determine that sequence (similar to sqrt expansion).

@@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {

if (VT.isVector()) {

// fold (mulhs x, 0) -> 0

- if (ISD::isBuildVectorAllZeros(N1.getNode()))

- return N1;

- if (ISD::isBuildVectorAllZeros(N0.getNode()))

- return N0;

+ // do not return N0/N1, because undef node may exist.

+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||

+ ISD::isBuildVectorAllZeros(N1.getNode()))

+ return DAG.getConstant(0, DL, VT);

}

// fold (mulhs x, 0) -> 0

@@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {

// fold (mulhs x, 1) -> (sra x, size(x)-1)

if (isOneConstant(N1))

return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,

- DAG.getConstant(N0.getValueSizeInBits() - 1, DL,

+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,

getShiftAmountTy(N0.getValueType())));

// fold (mulhs x, undef) -> 0

@@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {

if (VT.isVector()) {

// fold (mulhu x, 0) -> 0

- if (ISD::isBuildVectorAllZeros(N1.getNode()))

- return N1;

- if (ISD::isBuildVectorAllZeros(N0.getNode()))

- return N0;

+ // do not return N0/N1, because undef node may exist.

+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||

+ ISD::isBuildVectorAllZeros(N1.getNode()))

+ return DAG.getConstant(0, DL, VT);

}

// fold (mulhu x, 0) -> 0

@@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {

EVT VT = N->getValueType(0);

SDLoc DL(N);

+ // (umul_lohi N0, 0) -> (0, 0)

+ if (isNullConstant(N->getOperand(1))) {

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ return CombineTo(N, Zero, Zero);

+ }

+ // (umul_lohi N0, 1) -> (N0, 0)

+ if (isOneConstant(N->getOperand(1))) {

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ return CombineTo(N, N->getOperand(0), Zero);

+ }

// If the type is twice as wide is legal, transform the mulhu to a wider

// multiply plus a shift.

if (VT.isSimple() && !VT.isVector()) {

@@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {

}

SDValue DAGCombiner::visitMULO(SDNode *N) {

+ SDValue N0 = N->getOperand(0);

+ SDValue N1 = N->getOperand(1);

+ EVT VT = N0.getValueType();

bool IsSigned = (ISD::SMULO == N->getOpcode());

+ EVT CarryVT = N->getValueType(1);

+ SDLoc DL(N);

+ // canonicalize constant to RHS.

+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&

+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))

+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);

+ // fold (mulo x, 0) -> 0 + no carry out

+ if (isNullOrNullSplat(N1))

+ return CombineTo(N, DAG.getConstant(0, DL, VT),

+ DAG.getConstant(0, DL, CarryVT));

// (mulo x, 2) -> (addo x, x)

- if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))

+ if (ConstantSDNode *C2 = isConstOrConstSplat(N1))

if (C2->getAPIntValue() == 2)

- return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),

- N->getVTList(), N->getOperand(0), N->getOperand(0));

+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,

+ N->getVTList(), N0, N0);

return SDValue();

}

@@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {

if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&

Level <= AfterLegalizeTypes) {

// Input types must be integer and the same.

- if (XVT.isInteger() && XVT == Y.getValueType()) {

+ if (XVT.isInteger() && XVT == Y.getValueType() &&

+ !(VT.isVector() && TLI.isTypeLegal(VT) &&

+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {

SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);

return DAG.getNode(HandOpcode, DL, VT, Logic);

}

@@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,

return true;

}

- // Do not change the width of a volatile load.

- if (LoadN->isVolatile())

+ // Do not change the width of a volatile or atomic loads.

+ if (!LoadN->isSimple())

return false;

// Do not generate loads of non-round integer types since these can

@@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,

if (!MemVT.isRound())

return false;

- // Don't change the width of a volatile load.

- if (LDST->isVolatile())

+ // Don't change the width of a volatile or atomic loads.

+ if (!LDST->isSimple())

return false;

// Verify that we are actually reducing a load width here.

if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())

return false;

- // Ensure that this isn't going to produce an unsupported unaligned access.

+ // Ensure that this isn't going to produce an unsupported memory access.

if (ShAmt &&

!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,

LDST->getAddressSpace(), ShAmt / 8,

@@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {

return T1;

}

+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.

+/// For a target with a bit test, this is expected to become test + set and save

+/// at least 1 instruction.

+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {

+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");

+ // This is probably not worthwhile without a supported type.

+ EVT VT = And->getValueType(0);

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ if (!TLI.isTypeLegal(VT))

+ return SDValue();

+ // Look through an optional extension and find a 'not'.

+ // TODO: Should we favor test+set even without the 'not' op?

+ SDValue Not = And->getOperand(0), And1 = And->getOperand(1);

+ if (Not.getOpcode() == ISD::ANY_EXTEND)

+ Not = Not.getOperand(0);

+ if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))

+ return SDValue();

+ // Look though an optional truncation. The source operand may not be the same

+ // type as the original 'and', but that is ok because we are masking off

+ // everything but the low bit.

+ SDValue Srl = Not.getOperand(0);

+ if (Srl.getOpcode() == ISD::TRUNCATE)

+ Srl = Srl.getOperand(0);

+ // Match a shift-right by constant.

+ if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||

+ !isa<ConstantSDNode>(Srl.getOperand(1)))

+ return SDValue();

+ // We might have looked through casts that make this transform invalid.

+ // TODO: If the source type is wider than the result type, do the mask and

+ // compare in the source type.

+ const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);

+ unsigned VTBitWidth = VT.getSizeInBits();

+ if (ShiftAmt.uge(VTBitWidth))

+ return SDValue();

+ // Turn this into a bit-test pattern using mask op + setcc:

+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0

+ SDLoc DL(And);

+ SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);

+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

+ SDValue Mask = DAG.getConstant(

+ APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);

+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);

+ return DAG.getZExtOrTrunc(Setcc, DL, VT);

SDValue DAGCombiner::visitAND(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

return SDValue(N, 0); // Return N so it doesn't get rechecked!

}

// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->

// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must

// already be zero by virtue of the width of the base type of the load.

@@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

unsigned MemBitSize = MemVT.getScalarSizeInBits();

APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);

if (DAG.MaskedValueIsZero(N1, ExtBits) &&

- ((!LegalOperations && !LN0->isVolatile()) ||

+ ((!LegalOperations && LN0->isSimple()) ||

TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {

SDValue ExtLoad =

DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),

@@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {

if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))

return Shifts;

+ if (TLI.hasBitTest(N0, N1))

+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))

+ return V;

return SDValue();

}

@@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {

return true;

}

+// Match 2 elements of a packed halfword bswap.

+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {

+ if (N.getOpcode() == ISD::OR)

+ return isBSwapHWordElement(N.getOperand(0), Parts) &&

+ isBSwapHWordElement(N.getOperand(1), Parts);

+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {

+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));

+ if (!C || C->getAPIntValue() != 16)

+ return false;

+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();

+ return true;

+ }

+ return false;

/// Match a 32-bit packed halfword bswap. That is

/// ((x & 0x000000ff) << 8) |

/// ((x & 0x0000ff00) >> 8) |

@@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {

return SDValue();

// Look for either

- // (or (or (and), (and)), (or (and), (and)))

- // (or (or (or (and), (and)), (and)), (and))

- if (N0.getOpcode() != ISD::OR)

- return SDValue();

- SDValue N00 = N0.getOperand(0);

- SDValue N01 = N0.getOperand(1);

+ // (or (bswaphpair), (bswaphpair))

+ // (or (or (bswaphpair), (and)), (and))

+ // (or (or (and), (bswaphpair)), (and))

SDNode *Parts[4] = {};

- if (N1.getOpcode() == ISD::OR &&

- N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {

+ if (isBSwapHWordPair(N0, Parts)) {

// (or (or (and), (and)), (or (and), (and)))

- if (!isBSwapHWordElement(N00, Parts))

+ if (!isBSwapHWordPair(N1, Parts))

return SDValue();

- if (!isBSwapHWordElement(N01, Parts))

- return SDValue();

- SDValue N10 = N1.getOperand(0);

- if (!isBSwapHWordElement(N10, Parts))

- return SDValue();

- SDValue N11 = N1.getOperand(1);

- if (!isBSwapHWordElement(N11, Parts))

- return SDValue();

- } else {

+ } else if (N0.getOpcode() == ISD::OR) {

// (or (or (or (and), (and)), (and)), (and))

if (!isBSwapHWordElement(N1, Parts))

return SDValue();

- if (!isBSwapHWordElement(N01, Parts))

- return SDValue();

- if (N00.getOpcode() != ISD::OR)

- return SDValue();

- SDValue N000 = N00.getOperand(0);

- if (!isBSwapHWordElement(N000, Parts))

- return SDValue();

- SDValue N001 = N00.getOperand(1);

- if (!isBSwapHWordElement(N001, Parts))

+ SDValue N00 = N0.getOperand(0);

+ SDValue N01 = N0.getOperand(1);

+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&

+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))

return SDValue();

- }

+ } else

+ return SDValue();

// Make sure the parts are all coming from the same node.

if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])

@@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {

SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);

SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);

- bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);

- if (!LegalMask) {

- std::swap(NewLHS, NewRHS);

- ShuffleVectorSDNode::commuteMask(Mask);

- LegalMask = TLI.isShuffleMaskLegal(Mask, VT);

- }

- if (LegalMask)

- return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);

+ SDValue LegalShuffle =

+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,

+ Mask, DAG);

+ if (LegalShuffle)

+ return LegalShuffle;

}

@@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {

return V;

// See if this is some rotate idiom.

- if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))

- return SDValue(Rot, 0);

+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))

+ return Rot;

if (SDValue Load = MatchLoadCombine(N))

return Load;

@@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,

/// Otherwise, returns an expansion of \p ExtractFrom based on the following

/// patterns:

///

+/// (or (add v v) (shrl v bitwidth-1)):

+/// expands (add v v) -> (shl v 1)

+///

/// (or (mul v c0) (shrl (mul v c1) c2)):

/// expands (mul v c0) -> (shl (mul v c1) c3)

///

@@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,

"Existing shift must be valid as a rotate half");

ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);

+ // Value and Type of the shift.

+ SDValue OppShiftLHS = OppShift.getOperand(0);

+ EVT ShiftedVT = OppShiftLHS.getValueType();

+ // Amount of the existing shift.

+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));

+ // (add v v) -> (shl v 1)

+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&

+ ExtractFrom.getOpcode() == ISD::ADD &&

+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&

+ ExtractFrom.getOperand(0) == OppShiftLHS &&

+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)

+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,

+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));

// Preconditions:

// (or (op0 v c0) (shiftl/r (op0 v c1) c2))

@@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,

// op0 must be the same opcode on both sides, have the same LHS argument,

// and produce the same value type.

- SDValue OppShiftLHS = OppShift.getOperand(0);

- EVT ShiftedVT = OppShiftLHS.getValueType();

if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||

OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||

ShiftedVT != ExtractFrom.getValueType())

return SDValue();

- // Amount of the existing shift.

- ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));

// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.

ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));

// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.

@@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,

// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the

// former being preferred if supported. InnerPos and InnerNeg are Pos and

// Neg with outer conversions stripped away.

-SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,

+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,

SDValue Neg, SDValue InnerPos,

SDValue InnerNeg, unsigned PosOpcode,

unsigned NegOpcode, const SDLoc &DL) {

@@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,

if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {

bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);

return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,

- HasPos ? Pos : Neg).getNode();

+ HasPos ? Pos : Neg);

}

- return nullptr;

+ return SDValue();

}

// MatchRotate - Handle an 'or' of two operands. If this is one of the many

// idioms for rotate, and if the target supports rotation instructions, generate

// a rot[lr].

-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

// Must be a legal type. Expanded 'n promoted things won't work with rotates.

EVT VT = LHS.getValueType();

- if (!TLI.isTypeLegal(VT)) return nullptr;

+ if (!TLI.isTypeLegal(VT))

+ return SDValue();

// The target must have at least one rotate flavor.

bool HasROTL = hasOperation(ISD::ROTL, VT);

bool HasROTR = hasOperation(ISD::ROTR, VT);

- if (!HasROTL && !HasROTR) return nullptr;

+ if (!HasROTL && !HasROTR)

+ return SDValue();

// Check for truncated rotate.

if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&

LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {

assert(LHS.getValueType() == RHS.getValueType());

- if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {

- return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),

- SDValue(Rot, 0)).getNode();

+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {

+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);

}

@@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

// If neither side matched a rotate half, bail

if (!LHSShift && !RHSShift)

- return nullptr;

+ return SDValue();

// InstCombine may have combined a constant shl, srl, mul, or udiv with one

// side of the rotate, so try to handle that here. In all cases we need to

@@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

// If a side is still missing, nothing else we can do.

if (!RHSShift || !LHSShift)

- return nullptr;

+ return SDValue();

// At this point we've matched or extracted a shift op on each side.

if (LHSShift.getOperand(0) != RHSShift.getOperand(0))

- return nullptr; // Not shifting the same value.

+ return SDValue(); // Not shifting the same value.

if (LHSShift.getOpcode() == RHSShift.getOpcode())

- return nullptr; // Shifts must disagree.

+ return SDValue(); // Shifts must disagree.

// Canonicalize shl to left side in a shl/srl pair.

if (RHSShift.getOpcode() == ISD::SHL) {

@@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);

}

- return Rot.getNode();

+ return Rot;

}

// If there is a mask here, and we have a variable shift, we can't be sure

// that we're masking out the right stuff.

if (LHSMask.getNode() || RHSMask.getNode())

- return nullptr;

+ return SDValue();

// If the shift amount is sign/zext/any-extended just peel it off.

SDValue LExtOp0 = LHSShiftAmt;

@@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {

RExtOp0 = RHSShiftAmt.getOperand(0);

}

- SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,

+ SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,

LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);

if (TryL)

return TryL;

- SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,

+ SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,

RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);

if (TryR)

return TryR;

- return nullptr;

+ return SDValue();

}

namespace {

@@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,

Depth + 1);

case ISD::LOAD: {

auto L = cast<LoadSDNode>(Op.getNode());

- if (L->isVolatile() || L->isIndexed())

+ if (!L->isSimple() || L->isIndexed())

return None;

unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();

@@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {

SDValue Chain;

SmallVector<StoreSDNode *, 8> Stores;

for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {

+ // TODO: Allow unordered atomics when wider type is legal (see D66309)

if (Store->getMemoryVT() != MVT::i8 ||

- Store->isVolatile() || Store->isIndexed())

+ !Store->isSimple() || Store->isIndexed())

return SDValue();

Stores.push_back(Store);

Chain = Store->getChain();

@@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {

return SDValue();

LoadSDNode *L = P->Load;

- assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&

+ assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&

+ !L->isIndexed() &&

"Must be enforced by calculateByteProvider");

assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");

@@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {

// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc

if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&

(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {

- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);

- if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {

+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);

+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {

unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;

- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS

- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS

- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());

- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);

+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00

+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01

+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());

+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);

}

// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants

if (isAllOnesConstant(N1) && N0.hasOneUse() &&

(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {

- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);

- if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {

+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);

+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {

unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;

- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS

- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS

- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());

- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);

+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00

+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01

+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());

+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);

}

@@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {

return SDValue();

}

+/// If we have a shift-by-constant of a bitwise logic op that itself has a

+/// shift-by-constant operand with identical opcode, we may be able to convert

+/// that into 2 independent shifts followed by the logic op. This is a

+/// throughput improvement.

+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {

+ // Match a one-use bitwise logic op.

+ SDValue LogicOp = Shift->getOperand(0);

+ if (!LogicOp.hasOneUse())

+ return SDValue();

+ unsigned LogicOpcode = LogicOp.getOpcode();

+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&

+ LogicOpcode != ISD::XOR)

+ return SDValue();

+ // Find a matching one-use shift by constant.

+ unsigned ShiftOpcode = Shift->getOpcode();

+ SDValue C1 = Shift->getOperand(1);

+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);

+ assert(C1Node && "Expected a shift with constant operand");

+ const APInt &C1Val = C1Node->getAPIntValue();

+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,

+ const APInt *&ShiftAmtVal) {

+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())

+ return false;

+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));

+ if (!ShiftCNode)

+ return false;

+ // Capture the shifted operand and shift amount value.

+ ShiftOp = V.getOperand(0);

+ ShiftAmtVal = &ShiftCNode->getAPIntValue();

+ // Shift amount types do not have to match their operand type, so check that

+ // the constants are the same width.

+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())

+ return false;

+ // The fold is not valid if the sum of the shift values exceeds bitwidth.

+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))

+ return false;

+ return true;

+ };

+ // Logic ops are commutative, so check each operand for a match.

+ SDValue X, Y;

+ const APInt *C0Val;

+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))

+ Y = LogicOp.getOperand(1);

+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))

+ Y = LogicOp.getOperand(0);

+ else

+ return SDValue();

+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)

+ SDLoc DL(Shift);

+ EVT VT = Shift->getValueType(0);

+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();

+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);

+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);

+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);

+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);

/// Handle transforms common to the three shifts, when the shift amount is a

/// constant.

/// We are looking for: (shift being one of shl/sra/srl)

/// shift (binop X, C0), C1

/// And want to transform into:

/// binop (shift X, C1), (shift C0, C1)

-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {

+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {

+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");

// Do not turn a 'not' into a regular xor.

if (isBitwiseNot(N->getOperand(0)))

return SDValue();

// The inner binop must be one-use, since we want to replace it.

- SDNode *LHS = N->getOperand(0).getNode();

- if (!LHS->hasOneUse()) return SDValue();

+ SDValue LHS = N->getOperand(0);

+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))

+ return SDValue();

+ // TODO: This is limited to early combining because it may reveal regressions

+ // otherwise. But since we just checked a target hook to see if this is

+ // desirable, that should have filtered out cases where this interferes

+ // with some other pattern matching.

+ if (!LegalTypes)

+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))

+ return R;

// We want to pull some binops through shifts, so that we have (and (shift))

// instead of (shift (and)), likewise for add, or, xor, etc. This sort of

// thing happens with address calculations, so it's important to canonicalize

// it.

- switch (LHS->getOpcode()) {

+ switch (LHS.getOpcode()) {

default:

return SDValue();

case ISD::OR:

@@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {

}

// We require the RHS of the binop to be a constant and not opaque as well.

- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));

+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));

if (!BinOpCst)

return SDValue();

// FIXME: disable this unless the input to the binop is a shift by a constant

// or is copy/select. Enable this in other cases when figure out it's exactly

// profitable.

- SDValue BinOpLHSVal = LHS->getOperand(0);

+ SDValue BinOpLHSVal = LHS.getOperand(0);

bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||

BinOpLHSVal.getOpcode() == ISD::SRA ||

BinOpLHSVal.getOpcode() == ISD::SRL) &&

@@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {

if (IsCopyOrSelect && N->hasOneUse())

return SDValue();

- EVT VT = N->getValueType(0);

- if (!TLI.isDesirableToCommuteWithShift(N, Level))

- return SDValue();

// Fold the constants, shifting the binop RHS by the shift amount.

- SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),

- N->getValueType(0),

- LHS->getOperand(1), N->getOperand(1));

+ SDLoc DL(N);

+ EVT VT = N->getValueType(0);

+ SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),

+ N->getOperand(1));

assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");

- // Create the new shift.

- SDValue NewShift = DAG.getNode(N->getOpcode(),

- SDLoc(LHS->getOperand(0)),

- VT, LHS->getOperand(0), N->getOperand(1));

- // Create the new binop.

- return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);

+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),

+ N->getOperand(1));

+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);

}

SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {

@@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {

}

if (N1C && !N1C->isOpaque())

- if (SDValue NewSHL = visitShiftByConstant(N, N1C))

+ if (SDValue NewSHL = visitShiftByConstant(N))

return NewSHL;

return SDValue();

@@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {

}

+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.

+ // sra (add (shl X, N1C), AddC), N1C -->

+ // sext (add (trunc X to (width - N1C)), AddC')

+ if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&

+ N0.getOperand(0).getOpcode() == ISD::SHL &&

+ N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {

+ if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {

+ SDValue Shl = N0.getOperand(0);

+ // Determine what the truncate's type would be and ask the target if that

+ // is a free operation.

+ LLVMContext &Ctx = *DAG.getContext();

+ unsigned ShiftAmt = N1C->getZExtValue();

+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);

+ if (VT.isVector())

+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());

+ // TODO: The simple type check probably belongs in the default hook

+ // implementation and/or target-specific overrides (because

+ // non-simple types likely require masking when legalized), but that

+ // restriction may conflict with other transforms.

+ if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {

+ SDLoc DL(N);

+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);

+ SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).

+ trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);

+ SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);

+ return DAG.getSExtOrTrunc(Add, DL, VT);

+ }

// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).

if (N1.getOpcode() == ISD::TRUNCATE &&

N1.getOperand(0).getOpcode() == ISD::AND) {

@@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {

return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);

if (N1C && !N1C->isOpaque())

- if (SDValue NewSRA = visitShiftByConstant(N, N1C))

+ if (SDValue NewSRA = visitShiftByConstant(N))

return NewSRA;

return SDValue();

@@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {

return SDValue(N, 0);

if (N1C && !N1C->isOpaque())

- if (SDValue NewSRL = visitShiftByConstant(N, N1C))

+ if (SDValue NewSRL = visitShiftByConstant(N))

return NewSRL;

// Attempt to convert a srl of a load into a narrower zero-extending load.

@@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,

}

+/// If a (v)select has a condition value that is a sign-bit test, try to smear

+/// the condition operand sign-bit across the value width and use it as a mask.

+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {

+ SDValue Cond = N->getOperand(0);

+ SDValue C1 = N->getOperand(1);

+ SDValue C2 = N->getOperand(2);

+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&

+ "Expected select-of-constants");

+ EVT VT = N->getValueType(0);

+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||

+ VT != Cond.getOperand(0).getValueType())

+ return SDValue();

+ // The inverted-condition + commuted-select variants of these patterns are

+ // canonicalized to these forms in IR.

+ SDValue X = Cond.getOperand(0);

+ SDValue CondC = Cond.getOperand(1);

+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&

+ isAllOnesOrAllOnesSplat(C2)) {

+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1

+ SDLoc DL(N);

+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);

+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);

+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);

+ }

+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {

+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1

+ SDLoc DL(N);

+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);

+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);

+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);

+ }

+ return SDValue();

SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {

SDValue Cond = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {

return Cond;

}

- // For any constants that differ by 1, we can transform the select into an

- // extend and add. Use a target hook because some targets may prefer to

- // transform in the other direction.

+ // Use a target hook because some targets may prefer to transform in the

+ // other direction.

if (TLI.convertSelectOfConstantsToMath(VT)) {

- if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {

+ // For any constants that differ by 1, we can transform the select into an

+ // extend and add.

+ const APInt &C1Val = C1->getAPIntValue();

+ const APInt &C2Val = C2->getAPIntValue();

+ if (C1Val - 1 == C2Val) {

// select Cond, C1, C1-1 --> add (zext Cond), C1-1

if (VT != MVT::i1)

Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);

return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);

}

- if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {

+ if (C1Val + 1 == C2Val) {

// select Cond, C1, C1+1 --> add (sext Cond), C1+1

if (VT != MVT::i1)

Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);

return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);

}

+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)

+ if (C1Val.isPowerOf2() && C2Val.isNullValue()) {

+ if (VT != MVT::i1)

+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);

+ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);

+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);

+ }

+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))

+ return V;

}

return SDValue();

@@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {

return SDValue();

}

-static

-std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {

- SDLoc DL(N);

- EVT LoVT, HiVT;

- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

- // Split the inputs.

- SDValue Lo, Hi, LL, LH, RL, RH;

- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);

- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);

- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));

- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));

- return std::make_pair(Lo, Hi);

// This function assumes all the vselect's arguments are CONCAT_VECTOR

// nodes and that the condition is a BV of ConstantSDNodes (or undefs).

static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {

@@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {

SDValue DAGCombiner::visitMSCATTER(SDNode *N) {

MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);

SDValue Mask = MSC->getMask();

- SDValue Data = MSC->getValue();

SDValue Chain = MSC->getChain();

SDLoc DL(N);

@@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {

if (ISD::isBuildVectorAllZeros(Mask.getNode()))

return Chain;

- if (Level >= AfterLegalizeTypes)

- return SDValue();

- // If the MSCATTER data type requires splitting and the mask is provided by a

- // SETCC, then split both nodes and its operands before legalization. This

- // prevents the type legalizer from unrolling SETCC into scalar comparisons

- // and enables future optimizations (e.g. min/max pattern matching on X86).

- if (Mask.getOpcode() != ISD::SETCC)

- return SDValue();

- // Check if any splitting is required.

- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=

- TargetLowering::TypeSplitVector)

- return SDValue();

- SDValue MaskLo, MaskHi;

- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);

- EVT LoVT, HiVT;

- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));

- EVT MemoryVT = MSC->getMemoryVT();

- unsigned Alignment = MSC->getOriginalAlignment();

- EVT LoMemVT, HiMemVT;

- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

- SDValue DataLo, DataHi;

- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

- SDValue Scale = MSC->getScale();

- SDValue BasePtr = MSC->getBasePtr();

- SDValue IndexLo, IndexHi;

- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);

- MachineMemOperand *MMO = DAG.getMachineFunction().

- getMachineMemOperand(MSC->getPointerInfo(),

- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),

- Alignment, MSC->getAAInfo(), MSC->getRanges());

- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };

- SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),

- DataLo.getValueType(), DL, OpsLo, MMO);

- // The order of the Scatter operation after split is well defined. The "Hi"

- // part comes after the "Lo". So these two operations should be chained one

- // after another.

- SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };

- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),

- DL, OpsHi, MMO);

+ return SDValue();

}

SDValue DAGCombiner::visitMSTORE(SDNode *N) {

MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);

SDValue Mask = MST->getMask();

- SDValue Data = MST->getValue();

SDValue Chain = MST->getChain();

- EVT VT = Data.getValueType();

SDLoc DL(N);

// Zap masked stores with a zero mask.

if (ISD::isBuildVectorAllZeros(Mask.getNode()))

return Chain;

- if (Level >= AfterLegalizeTypes)

- return SDValue();

- // If the MSTORE data type requires splitting and the mask is provided by a

- // SETCC, then split both nodes and its operands before legalization. This

- // prevents the type legalizer from unrolling SETCC into scalar comparisons

- // and enables future optimizations (e.g. min/max pattern matching on X86).

- if (Mask.getOpcode() == ISD::SETCC) {

- // Check if any splitting is required.

- if (TLI.getTypeAction(*DAG.getContext(), VT) !=

- TargetLowering::TypeSplitVector)

- return SDValue();

- SDValue MaskLo, MaskHi, Lo, Hi;

- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);

- SDValue Ptr = MST->getBasePtr();

- EVT MemoryVT = MST->getMemoryVT();

- unsigned Alignment = MST->getOriginalAlignment();

- EVT LoMemVT, HiMemVT;

- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

- SDValue DataLo, DataHi;

- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

- MachineMemOperand *MMO = DAG.getMachineFunction().

- getMachineMemOperand(MST->getPointerInfo(),

- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),

- Alignment, MST->getAAInfo(), MST->getRanges());

- Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,

- MST->isTruncatingStore(),

- MST->isCompressingStore());

- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,

- MST->isCompressingStore());

- unsigned HiOffset = LoMemVT.getStoreSize();

- MMO = DAG.getMachineFunction().getMachineMemOperand(

- MST->getPointerInfo().getWithOffset(HiOffset),

- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,

- MST->getAAInfo(), MST->getRanges());

- Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,

- MST->isTruncatingStore(),

- MST->isCompressingStore());

- AddToWorklist(Lo.getNode());

- AddToWorklist(Hi.getNode());

- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);

- }

return SDValue();

}

@@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {

if (ISD::isBuildVectorAllZeros(Mask.getNode()))

return CombineTo(N, MGT->getPassThru(), MGT->getChain());

- if (Level >= AfterLegalizeTypes)

- return SDValue();

- // If the MGATHER result requires splitting and the mask is provided by a

- // SETCC, then split both nodes and its operands before legalization. This

- // prevents the type legalizer from unrolling SETCC into scalar comparisons

- // and enables future optimizations (e.g. min/max pattern matching on X86).

- if (Mask.getOpcode() != ISD::SETCC)

- return SDValue();

- EVT VT = N->getValueType(0);

- // Check if any splitting is required.

- if (TLI.getTypeAction(*DAG.getContext(), VT) !=

- TargetLowering::TypeSplitVector)

- return SDValue();

- SDValue MaskLo, MaskHi, Lo, Hi;

- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);

- SDValue PassThru = MGT->getPassThru();

- SDValue PassThruLo, PassThruHi;

- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);

- EVT LoVT, HiVT;

- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);

- SDValue Chain = MGT->getChain();

- EVT MemoryVT = MGT->getMemoryVT();

- unsigned Alignment = MGT->getOriginalAlignment();

- EVT LoMemVT, HiMemVT;

- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

- SDValue Scale = MGT->getScale();

- SDValue BasePtr = MGT->getBasePtr();

- SDValue Index = MGT->getIndex();

- SDValue IndexLo, IndexHi;

- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);

- MachineMemOperand *MMO = DAG.getMachineFunction().

- getMachineMemOperand(MGT->getPointerInfo(),

- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),

- Alignment, MGT->getAAInfo(), MGT->getRanges());

- SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };

- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,

- MMO);

- SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };

- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,

- MMO);

- AddToWorklist(Lo.getNode());

- AddToWorklist(Hi.getNode());

- // Build a factor node to remember that this load is independent of the

- // other one.

- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),

- Hi.getValue(1));

- // Legalized the chain result - switch anything that used the old chain to

- // use the new one.

- DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);

- SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

- SDValue RetOps[] = { GatherRes, Chain };

- return DAG.getMergeValues(RetOps, DL);

+ return SDValue();

}

SDValue DAGCombiner::visitMLOAD(SDNode *N) {

@@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {

if (ISD::isBuildVectorAllZeros(Mask.getNode()))

return CombineTo(N, MLD->getPassThru(), MLD->getChain());

- if (Level >= AfterLegalizeTypes)

- return SDValue();

- // If the MLOAD result requires splitting and the mask is provided by a

- // SETCC, then split both nodes and its operands before legalization. This

- // prevents the type legalizer from unrolling SETCC into scalar comparisons

- // and enables future optimizations (e.g. min/max pattern matching on X86).

- if (Mask.getOpcode() == ISD::SETCC) {

- EVT VT = N->getValueType(0);

- // Check if any splitting is required.

- if (TLI.getTypeAction(*DAG.getContext(), VT) !=

- TargetLowering::TypeSplitVector)

- return SDValue();

- SDValue MaskLo, MaskHi, Lo, Hi;

- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);

- SDValue PassThru = MLD->getPassThru();

- SDValue PassThruLo, PassThruHi;

- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);

- EVT LoVT, HiVT;

- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));

- SDValue Chain = MLD->getChain();

- SDValue Ptr = MLD->getBasePtr();

- EVT MemoryVT = MLD->getMemoryVT();

- unsigned Alignment = MLD->getOriginalAlignment();

- EVT LoMemVT, HiMemVT;

- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

- MachineMemOperand *MMO = DAG.getMachineFunction().

- getMachineMemOperand(MLD->getPointerInfo(),

- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),

- Alignment, MLD->getAAInfo(), MLD->getRanges());

- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,

- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());

- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,

- MLD->isExpandingLoad());

- unsigned HiOffset = LoMemVT.getStoreSize();

- MMO = DAG.getMachineFunction().getMachineMemOperand(

- MLD->getPointerInfo().getWithOffset(HiOffset),

- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,

- MLD->getAAInfo(), MLD->getRanges());

- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,

- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());

- AddToWorklist(Lo.getNode());

- AddToWorklist(Hi.getNode());

- // Build a factor node to remember that this load is independent of the

- // other one.

- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),

- Hi.getValue(1));

- // Legalized the chain result - switch anything that used the old chain to

- // use the new one.

- DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);

- SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);

- SDValue RetOps[] = { LoadRes, Chain };

- return DAG.getMergeValues(RetOps, DL);

- }

return SDValue();

}

@@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {

return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);

}

+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)

+ APInt Pow2C;

+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&

+ isNullOrNullSplat(N2)) {

+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);

+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);

+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);

+ }

+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))

+ return V;

// The general case for select-of-constants:

// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2

// ...but that only makes sense if a vselect is slower than 2 logic ops, so

@@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {

isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());

if (isAbs) {

- EVT VT = LHS.getValueType();

if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))

return DAG.getNode(ISD::ABS, DL, VT, LHS);

- SDValue Shift = DAG.getNode(

- ISD::SRA, DL, VT, LHS,

- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));

+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,

+ DAG.getConstant(VT.getScalarSizeInBits() - 1,

+ DL, getShiftAmountTy(VT)));

SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);

AddToWorklist(Shift.getNode());

AddToWorklist(Add.getNode());

@@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {

// This is OK if we don't care about what happens if either operand is a

// NaN.

- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),

- N0.getOperand(1), TLI)) {

- if (SDValue FMinMax = combineMinNumMaxNum(

- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))

+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {

+ if (SDValue FMinMax =

+ combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))

return FMinMax;

}

@@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||

- !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||

- !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))

+ !N0.hasOneUse() || !LN0->isSimple() ||

+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||

+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))

return SDValue();

SmallVector<SDNode *, 4> SetCCs;

@@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

EVT MemVT = LN0->getMemoryVT();

- if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&

+ if ((LegalOperations || !LN0->isSimple() ||

+ VT.isVector()) &&

!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))

return SDValue();

@@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,

if (!ISD::isNON_EXTLoad(N0.getNode()) ||

!ISD::isUNINDEXEDLoad(N0.getNode()) ||

((LegalOperations || VT.isVector() ||

- cast<LoadSDNode>(N0)->isVolatile()) &&

+ !cast<LoadSDNode>(N0)->isSimple()) &&

!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))

return {};

@@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,

return SDValue(N, 0); // Return N so it doesn't get rechecked!

}

+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,

+ const TargetLowering &TLI, EVT VT,

+ SDNode *N, SDValue N0,

+ ISD::LoadExtType ExtLoadType,

+ ISD::NodeType ExtOpc) {

+ if (!N0.hasOneUse())

+ return SDValue();

+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);

+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)

+ return SDValue();

+ if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))

+ return SDValue();

+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))

+ return SDValue();

+ SDLoc dl(Ld);

+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());

+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),

+ Ld->getBasePtr(), Ld->getMask(),

+ PassThru, Ld->getMemoryVT(),

+ Ld->getMemOperand(), ExtLoadType,

+ Ld->isExpandingLoad());

+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));

+ return NewLoad;

static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,

bool LegalOperations) {

assert((N->getOpcode() == ISD::SIGN_EXTEND ||

@@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {

ISD::SEXTLOAD, ISD::SIGN_EXTEND))

return foldedExt;

+ if (SDValue foldedExt =

+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,

+ ISD::SIGN_EXTEND))

+ return foldedExt;

// fold (sext (load x)) to multiple smaller sextloads.

// Only on illegal but splittable vectors.

if (SDValue ExtLoad = CombineExtLoad(N))

@@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {

ISD::ZEXTLOAD, ISD::ZERO_EXTEND))

return foldedExt;

+ if (SDValue foldedExt =

+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,

+ ISD::ZERO_EXTEND))

+ return foldedExt;

// fold (zext (load x)) to multiple smaller zextloads.

// Only on illegal but splittable vectors.

if (SDValue ExtLoad = CombineExtLoad(N))

@@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {

return SDValue();

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

- if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))

+ // Reducing the width of a volatile load is illegal. For atomics, we may be

+ // able to reduce the width provided we never widen again. (see D66309)

+ if (!LN0->isSimple() ||

+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))

return SDValue();

auto AdjustBigEndianShift = [&](unsigned ShAmt) {

@@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {

SDValue Load;

if (ExtType == ISD::NON_EXTLOAD)

- Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,

+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,

LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,

LN0->getMemOperand()->getFlags(), LN0->getAAInfo());

else

- Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,

+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,

LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,

NewAlign, LN0->getMemOperand()->getFlags(),

LN0->getAAInfo());

@@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {

// no larger than the source) then the useful bits of the result are

// zero; we can't simply return the shortened shift, because the result

// of that operation is undefined.

- SDLoc DL(N0);

if (ShLeftAmt >= VT.getSizeInBits())

Result = DAG.getConstant(0, DL, VT);

else

@@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

if (ISD::isEXTLoad(N0.getNode()) &&

ISD::isUNINDEXEDLoad(N0.getNode()) &&

EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&

- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&

+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&

N0.hasOneUse()) ||

TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

@@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {

if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&

N0.hasOneUse() &&

EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&

- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&

TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,

@@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {

// after truncation.

if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

- if (!LN0->isVolatile() &&

+ if (LN0->isSimple() &&

LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {

SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),

VT, LN0->getChain(), LN0->getBasePtr(),

@@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {

// memory accesses. We don't care if the original type was legal or not

// as we assume software couldn't rely on the number of accesses of an

// illegal type.

- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||

+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||

TLI.isOperationLegal(ISD::LOAD, VT))) {

LoadSDNode *LN0 = cast<LoadSDNode>(N0);

@@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {

for (int i = 0; i != MaskScale; ++i)

NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);

- bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);

- if (!LegalMask) {

- std::swap(SV0, SV1);

- ShuffleVectorSDNode::commuteMask(NewMask);

- LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);

- }

- if (LegalMask)

- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);

+ SDValue LegalShuffle =

+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);

+ if (LegalShuffle)

+ return LegalShuffle;

}

return SDValue();

@@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {

// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)

ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);

if (N1C && N1C->isZero())

- if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())

+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())

return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))

@@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {

// fold (fadd A, (fneg B)) -> (fsub A, B)

if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&

- isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)

- return DAG.getNode(ISD::FSUB, DL, VT, N0,

- GetNegatedExpression(N1, DAG, LegalOperations,

- ForCodeSize), Flags);

+ TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)

+ return DAG.getNode(

+ ISD::FSUB, DL, VT, N0,

+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);

// fold (fadd (fneg A), B) -> (fsub B, A)

if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&

- isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)

- return DAG.getNode(ISD::FSUB, DL, VT, N1,

- GetNegatedExpression(N0, DAG, LegalOperations,

- ForCodeSize), Flags);

+ TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)

+ return DAG.getNode(

+ ISD::FSUB, DL, VT, N1,

+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);

auto isFMulNegTwo = [](SDValue FMul) {

if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)

@@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {

// If 'unsafe math' or reassoc and nsz, fold lots of things.

// TODO: break out portions of the transformations below for which Unsafe is

// considered and which do not require both nsz and reassoc

- if ((Options.UnsafeFPMath ||

+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||

(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&

AllowNewConst) {

// fadd (fadd x, c1), c2 -> fadd x, c1 + c2

@@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub A, 0) -> A

if (N1CFP && N1CFP->isZero()) {

- if (!N1CFP->isNegative() || Options.UnsafeFPMath ||

+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||

Flags.hasNoSignedZeros()) {

return N0;

}

@@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

if (N0CFP && N0CFP->isZero()) {

if (N0CFP->isNegative() ||

(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {

- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))

- return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);

+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))

+ return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);

if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);

}

- if ((Options.UnsafeFPMath ||

- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))

- && N1.getOpcode() == ISD::FADD) {

+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||

+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&

+ N1.getOpcode() == ISD::FADD) {

// X - (X + Y) -> -Y

if (N0 == N1->getOperand(0))

return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);

@@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

}

// fold (fsub A, (fneg B)) -> (fadd A, B)

- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))

- return DAG.getNode(ISD::FADD, DL, VT, N0,

- GetNegatedExpression(N1, DAG, LegalOperations,

- ForCodeSize), Flags);

+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))

+ return DAG.getNode(

+ ISD::FADD, DL, VT, N0,

+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);

// FSUB -> FMA combines:

if (SDValue Fused = visitFSUBForFMACombine(N)) {

@@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

return SDValue();

}

+/// Return true if both inputs are at least as cheap in negated form and at

+/// least one input is strictly cheaper in negated form.

+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {

+ if (char LHSNeg =

+ TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))

+ if (char RHSNeg =

+ TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))

+ // Both negated operands are at least as cheap as their counterparts.

+ // Check to see if at least one is cheaper negated.

+ if (LHSNeg == 2 || RHSNeg == 2)

+ return true;

+ return false;

SDValue DAGCombiner::visitFMUL(SDNode *N) {

SDValue N0 = N->getOperand(0);

SDValue N1 = N->getOperand(1);

@@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {

!isConstantFPBuildVectorOrConstantFP(N1))

return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);

- // fold (fmul A, 1.0) -> A

- if (N1CFP && N1CFP->isExactlyValue(1.0))

- return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))

return NewSel;

@@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {

if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))

return DAG.getNode(ISD::FNEG, DL, VT, N0);

- // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)

- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,

- ForCodeSize)) {

- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,

- ForCodeSize)) {

- // Both can be negated for free, check to see if at least one is cheaper

- // negated.

- if (LHSNeg == 2 || RHSNeg == 2)

- return DAG.getNode(ISD::FMUL, DL, VT,

- GetNegatedExpression(N0, DAG, LegalOperations,

- ForCodeSize),

- GetNegatedExpression(N1, DAG, LegalOperations,

- ForCodeSize),

- Flags);

- }

+ // -N0 * -N1 --> N0 * N1

+ if (isCheaperToUseNegatedFPOps(N0, N1)) {

+ SDValue NegN0 =

+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);

+ SDValue NegN1 =

+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);

+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);

}

// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))

@@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {

return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);

}

+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2

+ if (isCheaperToUseNegatedFPOps(N0, N1)) {

+ SDValue NegN0 =

+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);

+ SDValue NegN1 =

+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);

+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);

+ }

if (UnsafeFPMath) {

if (N0CFP && N0CFP->isZero())

return N2;

@@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {

// If this FDIV is part of a reciprocal square root, it may be folded

// into a target-specific square root estimate instruction.

if (N1.getOpcode() == ISD::FSQRT) {

- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {

+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))

return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);

- }

} else if (N1.getOpcode() == ISD::FP_EXTEND &&

N1.getOperand(0).getOpcode() == ISD::FSQRT) {

if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),

@@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {

}

// Fold into a reciprocal estimate and multiply instead of a real divide.

- if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {

- AddToWorklist(RV.getNode());

- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);

- }

+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))

+ return RV;

}

// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)

- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,

- ForCodeSize)) {

- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,

- ForCodeSize)) {

- // Both can be negated for free, check to see if at least one is cheaper

- // negated.

- if (LHSNeg == 2 || RHSNeg == 2)

- return DAG.getNode(ISD::FDIV, SDLoc(N), VT,

- GetNegatedExpression(N0, DAG, LegalOperations,

- ForCodeSize),

- GetNegatedExpression(N1, DAG, LegalOperations,

- ForCodeSize),

- Flags);

- }

+ if (isCheaperToUseNegatedFPOps(N0, N1))

+ return DAG.getNode(

+ ISD::FDIV, SDLoc(N), VT,

+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),

+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);

return SDValue();

}

@@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {

return SDValue();

}

-SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {

- SDValue N0 = N->getOperand(0);

- EVT VT = N->getValueType(0);

- EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();

- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);

- // fold (fp_round_inreg c1fp) -> c1fp

- if (N0CFP && isTypeLegal(EVT)) {

- SDLoc DL(N);

- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);

- return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);

- }

- return SDValue();

SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {

SDValue N0 = N->getOperand(0);

EVT VT = N->getValueType(0);

@@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {

if (isConstantFPBuildVectorOrConstantFP(N0))

return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);

- if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),

- &DAG.getTarget().Options, ForCodeSize))

- return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);

+ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))

+ return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);

// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading

// constant pool values.

@@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {

}

SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {

- if (OptLevel == CodeGenOpt::None || LD->isVolatile())

+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())

return SDValue();

SDValue Chain = LD->getOperand(0);

StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());

- if (!ST || ST->isVolatile())

+ // TODO: Relax this restriction for unordered atomics (see D66309)

+ if (!ST || !ST->isSimple())

return SDValue();

EVT LDType = LD->getValueType(0);

@@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {

// If load is not volatile and there are no uses of the loaded value (and

// the updated indexed value in case of indexed loads), change uses of the

// chain value into uses of the chain input (i.e. delete the dead load).

- if (!LD->isVolatile()) {

+ // TODO: Allow this for unordered atomics (see D66309)

+ if (LD->isSimple()) {

if (N->getValueType(1) == MVT::Other) {

// Unindexed loads.

if (!N->hasAnyUseOfValue(0)) {

@@ -14241,7 +14138,7 @@ struct LoadedSlice {

/// Helper structure used to compute the cost of a slice.

struct Cost {

/// Are we optimizing for code size.

- bool ForCodeSize;

+ bool ForCodeSize = false;

/// Various cost.

unsigned Loads = 0;

@@ -14250,10 +14147,10 @@ struct LoadedSlice {

unsigned ZExts = 0;

unsigned Shift = 0;

- Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}

+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}

/// Get the cost of one isolated slice.

- Cost(const LoadedSlice &LS, bool ForCodeSize = false)

+ Cost(const LoadedSlice &LS, bool ForCodeSize)

: ForCodeSize(ForCodeSize), Loads(1) {

EVT TruncType = LS.Inst->getValueType(0);

EVT LoadedType = LS.getLoadedType();

@@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {

return false;

LoadSDNode *LD = cast<LoadSDNode>(N);

- if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||

+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||

!LD->getValueType(0).isInteger())

return false;

@@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {

else if (Chain->getOpcode() == ISD::TokenFactor &&

SDValue(LD, 1).hasOneUse()) {

// LD has only 1 chain use so they are no indirect dependencies.

- bool isOk = false;

- for (const SDValue &ChainOp : Chain->op_values())

- if (ChainOp.getNode() == LD) {

- isOk = true;

- break;

- }

- if (!isOk)

+ if (!LD->isOperandOf(Chain.getNode()))

return Result;

} else

return Result; // Fail.

@@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {

/// Check to see if IVal is something that provides a value as specified by

/// MaskInfo. If so, replace the specified store with a narrower store of

/// truncated IVal.

-static SDNode *

+static SDValue

ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,

SDValue IVal, StoreSDNode *St,

DAGCombiner *DC) {

@@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,

// that uses this. If not, this is not a replacement.

APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),

ByteShift*8, (ByteShift+NumBytes)*8);

- if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;

+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();

// Check that it is legal on the target to do this. It is legal if the new

// VT we're shrinking to (i8/i16/i32) is legal or we're still before type

- // legalization.

- MVT VT = MVT::getIntegerVT(NumBytes*8);

+ // legalization (and the target doesn't explicitly think this is a bad idea).

+ MVT VT = MVT::getIntegerVT(NumBytes * 8);

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

if (!DC->isTypeLegal(VT))

- return nullptr;

+ return SDValue();

+ if (St->getMemOperand() &&

+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,

+ *St->getMemOperand()))

+ return SDValue();

// Okay, we can do this! Replace the 'St' store with a store of IVal that is

// shifted by ByteShift and truncated down to NumBytes.

@@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,

++OpsNarrowed;

return DAG

.getStore(St->getChain(), SDLoc(St), IVal, Ptr,

- St->getPointerInfo().getWithOffset(StOffset), NewAlign)

- .getNode();

+ St->getPointerInfo().getWithOffset(StOffset), NewAlign);

}

/// Look for sequence of load / op / store where op is one of 'or', 'xor', and

@@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,

/// or code size.

SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {

StoreSDNode *ST = cast<StoreSDNode>(N);

- if (ST->isVolatile())

+ if (!ST->isSimple())

return SDValue();

SDValue Chain = ST->getChain();

@@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {

std::pair<unsigned, unsigned> MaskedLoad;

MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);

if (MaskedLoad.first)

- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,

+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,

Value.getOperand(1), ST,this))

- return SDValue(NewST, 0);

+ return NewST;

// Or is commutative, so try swapping X and Y.

MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);

if (MaskedLoad.first)

- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,

+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,

Value.getOperand(0), ST,this))

- return SDValue(NewST, 0);

+ return NewST;

}

if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||

@@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates(

// Loads must only have one use.

if (!Ld->hasNUsesOfValue(1, 0))

return;

- // The memory operands must not be volatile/indexed.

- if (Ld->isVolatile() || Ld->isIndexed())

+ // The memory operands must not be volatile/indexed/atomic.

+ // TODO: May be able to relax for unordered atomics (see D66309)

+ if (!Ld->isSimple() || Ld->isIndexed())

return;

}

auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,

int64_t &Offset) -> bool {

- // The memory operands must not be volatile/indexed.

- if (Other->isVolatile() || Other->isIndexed())

+ // The memory operands must not be volatile/indexed/atomic.

+ // TODO: May be able to relax for unordered atomics (see D66309)

+ if (!Other->isSimple() || Other->isIndexed())

return false;

// Don't mix temporal stores with non-temporal stores.

if (St->isNonTemporal() != Other->isNonTemporal())

@@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates(

// Loads must only have one use.

if (!OtherLd->hasNUsesOfValue(1, 0))

return false;

- // The memory operands must not be volatile/indexed.

- if (OtherLd->isVolatile() || OtherLd->isIndexed())

+ // The memory operands must not be volatile/indexed/atomic.

+ // TODO: May be able to relax for unordered atomics (see D66309)

+ if (!OtherLd->isSimple() ||

+ OtherLd->isIndexed())

return false;

// Don't mix temporal loads with non-temporal loads.

if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())

@@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates(

return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));

};

+ // Check if the pair of StoreNode and the RootNode already bail out many

+ // times which is over the limit in dependence check.

+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,

+ SDNode *RootNode) -> bool {

+ auto RootCount = StoreRootCountMap.find(StoreNode);

+ if (RootCount != StoreRootCountMap.end() &&

+ RootCount->second.first == RootNode &&

+ RootCount->second.second > StoreMergeDependenceLimit)

+ return true;

+ return false;

+ };

// We looking for a root node which is an ancestor to all mergable

// stores. We search up through a load, to our root and then down

// through all children. For instance we will find Store{1,2,3} if

@@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates(

if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {

BaseIndexOffset Ptr;

int64_t PtrDiff;

- if (CandidateMatch(OtherST, Ptr, PtrDiff))

+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&

+ !OverLimitInDependenceCheck(OtherST, RootNode))

StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));

}

} else

@@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates(

if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {

BaseIndexOffset Ptr;

int64_t PtrDiff;

- if (CandidateMatch(OtherST, Ptr, PtrDiff))

+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&

+ !OverLimitInDependenceCheck(OtherST, RootNode))

StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));

}

@@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(

// Search through DAG. We can stop early if we find a store node.

for (unsigned i = 0; i < NumStores; ++i)

if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,

- Max))

+ Max)) {

+ // If the searching bail out, record the StoreNode and RootNode in the

+ // StoreRootCountMap. If we have seen the pair many times over a limit,

+ // we won't add the StoreNode into StoreNodes set again.

+ if (Visited.size() >= Max) {

+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];

+ if (RootCount.first == RootNode)

+ RootCount.second++;

+ else

+ RootCount = {RootNode, 1};

+ }

return false;

+ }

return true;

}

bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {

- if (OptLevel == CodeGenOpt::None)

+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)

return false;

EVT MemVT = St->getMemoryVT();

@@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {

bool RV = false;

while (StoreNodes.size() > 1) {

- unsigned StartIdx = 0;

+ size_t StartIdx = 0;

while ((StartIdx + 1 < StoreNodes.size()) &&

StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=

StoreNodes[StartIdx + 1].OffsetFromBase)

@@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {

case MVT::ppcf128:

return SDValue();

case MVT::f32:

- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||

+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||

TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {

;

Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().

@@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {

return SDValue();

case MVT::f64:

if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&

- !ST->isVolatile()) ||

+ ST->isSimple()) ||

TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {

;

Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().

@@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {

Ptr, ST->getMemOperand());

}

- if (!ST->isVolatile() &&

+ if (ST->isSimple() &&

TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {

// Many FP stores are not made apparent until after legalize, e.g. for

// argument passing. Since this is so common, custom legalize the

@@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {

// memory accesses. We don't care if the original type was legal or not

// as we assume software couldn't rely on the number of accesses of an

// illegal type.

- if (((!LegalOperations && !ST->isVolatile()) ||

+ // TODO: May be able to relax for unordered atomics (see D66309)

+ if (((!LegalOperations && ST->isSimple()) ||

TLI.isOperationLegal(ISD::STORE, SVT)) &&

TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,

DAG, *ST->getMemOperand())) {

@@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {

// See if we can simplify the input to this truncstore with knowledge that

// only the low bits are being used. For example:

// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"

- SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);

AddToWorklist(Value.getNode());

- if (Shorter)

+ if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))

return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),

ST->getMemOperand());

@@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {

// If this is a load followed by a store to the same location, then the store

// is dead/noop.

+ // TODO: Can relax for unordered atomics (see D66309)

if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {

if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&

- ST->isUnindexed() && !ST->isVolatile() &&

+ ST->isUnindexed() && ST->isSimple() &&

// There can't be any side effects between the load and store, such as

// a call or store.

Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {

@@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {

}

+ // TODO: Can relax for unordered atomics (see D66309)

if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {

- if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&

- !ST1->isVolatile()) {

+ if (ST->isUnindexed() && ST->isSimple() &&

+ ST1->isUnindexed() && ST1->isSimple()) {

if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&

ST->getMemoryVT() == ST1->getMemoryVT()) {

// If this is a store followed by a store with the same value to the

@@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {

break;

case ISD::STORE: {

StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);

- if (ST->isVolatile() || ST->isIndexed())

+ // TODO: Can relax for unordered atomics (see D66309)

+ if (!ST->isSimple() || ST->isIndexed())

continue;

const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);

// If we store purely within object bounds just before its lifetime ends,

@@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {

if (OptLevel == CodeGenOpt::None)

return SDValue();

+ // Can't change the number of memory accesses for a volatile store or break

+ // atomicity for an atomic one.

+ if (!ST->isSimple())

+ return SDValue();

SDValue Val = ST->getValue();

SDLoc DL(ST);

@@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {

}

/// Convert a disguised subvector insertion into a shuffle:

-/// insert_vector_elt V, (bitcast X from vector type), IdxC -->

-/// bitcast(shuffle (bitcast V), (extended X), Mask)

-/// Note: We do not use an insert_subvector node because that requires a legal

-/// subvector type.

SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {

SDValue InsertVal = N->getOperand(1);

+ SDValue Vec = N->getOperand(0);

+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)

+ // --> (vector_shuffle X, Y)

+ if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&

+ InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

+ isa<ConstantSDNode>(InsertVal.getOperand(1))) {

+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());

+ ArrayRef<int> Mask = SVN->getMask();

+ SDValue X = Vec.getOperand(0);

+ SDValue Y = Vec.getOperand(1);

+ // Vec's operand 0 is using indices from 0 to N-1 and

+ // operand 1 from N to 2N - 1, where N is the number of

+ // elements in the vectors.

+ int XOffset = -1;

+ if (InsertVal.getOperand(0) == X) {

+ XOffset = 0;

+ } else if (InsertVal.getOperand(0) == Y) {

+ XOffset = X.getValueType().getVectorNumElements();

+ }

+ if (XOffset != -1) {

+ SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());

+ auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));

+ NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();

+ assert(NewMask[InsIndex] <

+ (int)(2 * Vec.getValueType().getVectorNumElements()) &&

+ NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");

+ SDValue LegalShuffle =

+ TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,

+ Y, NewMask, DAG);

+ if (LegalShuffle)

+ return LegalShuffle;

+ }

+ // insert_vector_elt V, (bitcast X from vector type), IdxC -->

+ // bitcast(shuffle (bitcast V), (extended X), Mask)

+ // Note: We do not use an insert_subvector node because that requires a

+ // legal subvector type.

if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||

!InsertVal.getOperand(0).getValueType().isVector())

return SDValue();

@@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {

SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,

SDValue EltNo,

LoadSDNode *OriginalLoad) {

- assert(!OriginalLoad->isVolatile());

+ assert(OriginalLoad->isSimple());

EVT ResultVT = EVE->getValueType(0);

EVT VecEltVT = InVecVT.getVectorElementType();

@@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,

SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };

SDValue To[] = { Load, Chain };

DAG.ReplaceAllUsesOfValuesWith(From, To, 2);

+ // Make sure to revisit this node to clean it up; it will usually be dead.

+ AddToWorklist(EVE);

// Since we're explicitly calling ReplaceAllUses, add the new node to the

// worklist explicitly as well.

- AddToWorklist(Load.getNode());

AddUsersToWorklist(Load.getNode()); // Add users too

- // Make sure to revisit this node to clean it up; it will usually be dead.

- AddToWorklist(EVE);

+ AddToWorklist(Load.getNode());

++OpsNarrowed;

return SDValue(EVE, 0);

}

@@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

ISD::isNormalLoad(VecOp.getNode()) &&

!Index->hasPredecessor(VecOp.getNode())) {

auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);

- if (VecLoad && !VecLoad->isVolatile())

+ if (VecLoad && VecLoad->isSimple())

return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);

}

@@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {

// Make sure we found a non-volatile load and the extractelement is

// the only use.

- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())

+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())

return SDValue();

// If Idx was -1 above, Elt is going to be -1, so just return undef.

@@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {

// the shuffle mask with -1.

}

- // Turn this into a shuffle with zero if that's legal.

- EVT VecVT = Extract.getOperand(0).getValueType();

- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))

- return SDValue();

// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->

// bitcast (shuffle V, ZeroVec, VectorMask)

SDLoc DL(BV);

+ EVT VecVT = Extract.getOperand(0).getValueType();

SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);

- SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,

- ShufMask);

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),

+ ZeroVec, ShufMask, DAG);

+ if (!Shuf)

+ return SDValue();

return DAG.getBitcast(VT, Shuf);

}

@@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {

}

+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.

+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)

+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {

+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");

+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);

+ }

// Check if we can express BUILD VECTOR via subvector extract.

if (!LegalTypes && (N->getNumOperands() > 1)) {

SDValue Op0 = N->getOperand(0);

@@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {

}

- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))

- return SDValue();

- return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),

- DAG.getBitcast(VT, SV1), Mask);

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),

+ DAG.getBitcast(VT, SV1), Mask, DAG);

}

SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {

@@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {

SDValue In = N->getOperand(0);

assert(In.getValueType().isVector() && "Must concat vectors");

+ // If the input is a concat_vectors, just make a larger concat by padding

+ // with smaller undefs.

+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {

+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();

+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());

+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));

+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);

+ }

SDValue Scalar = peekThroughOneUseBitcasts(In);

// concat_vectors(scalar_to_vector(scalar), undef) ->

@@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {

return SDValue();

}

+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find

+// if the subvector can be sourced for free.

+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {

+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&

+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {

+ return V.getOperand(1);

+ }

+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);

+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&

+ V.getOperand(0).getValueType() == SubVT &&

+ (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {

+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();

+ return V.getOperand(SubIdx);

+ }

+ return SDValue();

static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,

SelectionDAG &DAG) {

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

@@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,

if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)

return SDValue();

+ EVT VecVT = BinOp.getValueType();

SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);

- SDValue Index = Extract->getOperand(1);

- EVT VT = Extract->getValueType(0);

+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())

+ return SDValue();

- // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find

- // if the source subvector is the same type as the one being extracted.

- auto GetSubVector = [VT, Index](SDValue V) -> SDValue {

- if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&

- V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {

- return V.getOperand(1);

- }

- auto *IndexC = dyn_cast<ConstantSDNode>(Index);

- if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&

- V.getOperand(0).getValueType() == VT &&

- (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {

- uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();

- return V.getOperand(SubIdx);

- }

+ SDValue Index = Extract->getOperand(1);

+ EVT SubVT = Extract->getValueType(0);

+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))

return SDValue();

- };

- SDValue Sub0 = GetSubVector(Bop0);

- SDValue Sub1 = GetSubVector(Bop1);

+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);

+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);

// TODO: We could handle the case where only 1 operand is being inserted by

// creating an extract of the other operand, but that requires checking

// number of uses and/or costs.

- if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))

+ if (!Sub0 || !Sub1)

return SDValue();

// We are inserting both operands of the wide binop only to extract back

// to the narrow vector size. Eliminate all of the insert/extract:

// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y

- return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,

+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,

BinOp->getFlags());

}

@@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {

auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));

auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));

- if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)

+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||

+ !ExtIdx)

return SDValue();

// Allow targets to opt-out.

@@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {

// build_vector.

if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {

int SplatIndex = SVN->getSplatIndex();

- if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&

+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&

TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {

// splat (vector_bo L, R), Index -->

// splat (scalar_bo (extelt L, Index), (extelt R, Index))

@@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {

SV1 = DAG.getUNDEF(VT);

// Avoid introducing shuffles with illegal mask.

- if (!TLI.isShuffleMaskLegal(Mask, VT)) {

- ShuffleVectorSDNode::commuteMask(Mask);

- if (!TLI.isShuffleMaskLegal(Mask, VT))

- return SDValue();

- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)

- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)

- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)

- std::swap(SV0, SV1);

- }

// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)

// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)

// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)

- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);

+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)

+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)

+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)

+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);

}

if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))

@@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {

SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);

int Elt = C0->getZExtValue();

NewMask[0] = Elt;

- SDValue Val;

// If we have an implict truncate do truncate here as long as it's legal.

// if it's not legal, this should

if (VT.getScalarType() != InVal.getValueType() &&

InVal.getValueType().isScalarInteger() &&

isTypeLegal(VT.getScalarType())) {

- Val =

+ SDValue Val =

DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);

return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);

}

if (VT.getScalarType() == InVecT.getScalarType() &&

- VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&

- TLI.isShuffleMaskLegal(NewMask, VT)) {

- Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,

- DAG.getUNDEF(InVecT), NewMask);

- // If the initial vector is the correct size this shuffle is a

- // valid result.

- if (VT == InVecT)

- return Val;

- // If not we must truncate the vector.

- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {

- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());

- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);

- EVT SubVT =

- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),

- VT.getVectorNumElements());

- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,

- ZeroIdx);

- return Val;

+ VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {

+ SDValue LegalShuffle =

+ TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,

+ DAG.getUNDEF(InVecT), NewMask, DAG);

+ if (LegalShuffle) {

+ // If the initial vector is the correct size this shuffle is a

+ // valid result.

+ if (VT == InVecT)

+ return LegalShuffle;

+ // If not we must truncate the vector.

+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {

+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());

+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);

+ EVT SubVT =

+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),

+ VT.getVectorNumElements());

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,

+ LegalShuffle, ZeroIdx);

+ }

}

@@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {

}

+ // Make sure all but the first op are undef or constant.

+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {

+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&

+ std::all_of(std::next(Concat->op_begin()), Concat->op_end(),

+ [](const SDValue &Op) {

+ return Op.isUndef() ||

+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());

+ });

+ };

+ // The following pattern is likely to emerge with vector reduction ops. Moving

+ // the binary operation ahead of the concat may allow using a narrower vector

+ // instruction that has better performance than the wide version of the op:

+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->

+ // concat (VBinOp X, Y), VecC

+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&

+ (LHS.hasOneUse() || RHS.hasOneUse())) {

+ EVT NarrowVT = LHS.getOperand(0).getValueType();

+ if (NarrowVT == RHS.getOperand(0).getValueType() &&

+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {

+ SDLoc DL(N);

+ unsigned NumOperands = LHS.getNumOperands();

+ SmallVector<SDValue, 4> ConcatOps;

+ for (unsigned i = 0; i != NumOperands; ++i) {

+ // This constant fold for operands 1 and up.

+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),

+ RHS.getOperand(i)));

+ }

+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);

+ }

if (SDValue V = scalarizeBinOpOfSplats(N, DAG))

return V;

@@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,

// Token chains must be identical.

if (LHS.getOperand(0) != RHS.getOperand(0) ||

// Do not let this transformation reduce the number of volatile loads.

- LLD->isVolatile() || RLD->isVolatile() ||

+ // Be conservative for atomics for the moment

+ // TODO: This does appear to be legal for unordered atomics (see D66309)

+ !LLD->isSimple() || !RLD->isSimple() ||

// FIXME: If either is a pre/post inc/dec load,

// we'd need to split out the address adjustment.

LLD->isIndexed() || RLD->isIndexed() ||

@@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,

SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(

const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,

ISD::CondCode CC) {

- if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))

+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))

return SDValue();

// If we are before legalize types, we want the other legalization to happen

@@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,

// when the condition can be materialized as an all-ones register. Any

// single bit-test can be materialized as an all-ones register with

// shift-left and shift-right-arith.

+ // TODO: The operation legality checks could be loosened to include "custom",

+ // but that may cause regressions for targets that do not have shift

+ // instructions.

if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&

- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {

+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&

+ TLI.isOperationLegal(ISD::SHL, VT) &&

+ TLI.isOperationLegal(ISD::SRA, VT)) {

SDValue AndLHS = N0->getOperand(0);

auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));

if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {

@@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {

/// =>

/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form

/// does not require additional intermediate precision]

-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {

+/// For the last iteration, put numerator N into it to gain more precision:

+/// Result = N X_i + X_i (N - N A X_i)

+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,

+ SDNodeFlags Flags) {

if (Level >= AfterLegalizeDAG)

return SDValue();

@@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {

if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {

AddToWorklist(Est.getNode());

+ SDLoc DL(Op);

if (Iterations) {

- SDLoc DL(Op);

SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);

- // Newton iterations: Est = Est + Est (1 - Arg * Est)

+ // Newton iterations: Est = Est + Est (N - Arg * Est)

+ // If this is the last iteration, also multiply by the numerator.

for (int i = 0; i < Iterations; ++i) {

- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);

+ SDValue MulEst = Est;

+ if (i == Iterations - 1) {

+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);

+ AddToWorklist(MulEst.getNode());

+ }

+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);

AddToWorklist(NewEst.getNode());

- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);

+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,

+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);

AddToWorklist(NewEst.getNode());

NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);

AddToWorklist(NewEst.getNode());

- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);

+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);

AddToWorklist(Est.getNode());

}

+ } else {

+ // If no iterations are available, multiply with N.

+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);

+ AddToWorklist(Est.getNode());

}

return Est;

}

@@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,

// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that

// this entire sequence requires only one FP constant.

SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);

- AddToWorklist(HalfArg.getNode());

HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);

- AddToWorklist(HalfArg.getNode());

// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)

for (unsigned i = 0; i < Iterations; ++i) {

SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);

- AddToWorklist(NewEst.getNode());

NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);

- AddToWorklist(NewEst.getNode());

NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);

- AddToWorklist(NewEst.getNode());

Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);

- AddToWorklist(Est.getNode());

}

// If non-reciprocal square root is requested, multiply the result by Arg.

- if (!Reciprocal) {

+ if (!Reciprocal)

Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);

- AddToWorklist(Est.getNode());

- }

return Est;

}

@@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,

// E = (E * -0.5) * ((A * E) * E + -3.0)

for (unsigned i = 0; i < Iterations; ++i) {

SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);

- AddToWorklist(AE.getNode());

SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);

- AddToWorklist(AEE.getNode());

SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);

- AddToWorklist(RHS.getNode());

// When calculating a square root at the last iteration build:

// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)

@@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,

// SQRT: LHS = (A * E) * -0.5

LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);

}

- AddToWorklist(LHS.getNode());

Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);

- AddToWorklist(Est.getNode());

}

return Est;

@@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,

SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);

SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);

Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);

- AddToWorklist(Fabs.getNode());

- AddToWorklist(IsDenorm.getNode());

- AddToWorklist(Est.getNode());

} else {

// X == 0.0 ? 0.0 : Est

SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);

SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);

Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);

- AddToWorklist(IsZero.getNode());

- AddToWorklist(Est.getNode());

}

@@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {

struct MemUseCharacteristics {

bool IsVolatile;

+ bool IsAtomic;

SDValue BasePtr;

int64_t Offset;

Optional<int64_t> NumBytes;

@@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {

: (LSN->getAddressingMode() == ISD::PRE_DEC)

? -1 * C->getSExtValue()

: 0;

- return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,

+ return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),

+ Offset /*base offset*/,

Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),

LSN->getMemOperand()};

}

if (const auto *LN = cast<LifetimeSDNode>(N))

- return {false /*isVolatile*/, LN->getOperand(1),

+ return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),

(LN->hasOffset()) ? LN->getOffset() : 0,

(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())

: Optional<int64_t>(),

(MachineMemOperand *)nullptr};

// Default.

- return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,

+ return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),

+ (int64_t)0 /*offset*/,

Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};

};

@@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {

if (MUC0.IsVolatile && MUC1.IsVolatile)

return true;

+ // Be conservative about atomics for the moment

+ // TODO: This is way overconservative for unordered atomics (see D66309)

+ if (MUC0.IsAtomic && MUC1.IsAtomic)

+ return true;

if (MUC0.MMO && MUC1.MMO) {

if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||

(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))

@@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,

SmallPtrSet<SDNode *, 16> Visited; // Visited node set.

// Get alias information for node.

- const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();

+ // TODO: relax aliasing for unordered atomics (see D66309)

+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();

// Starting off.

Chains.push_back(OriginalChain);

@@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,

case ISD::LOAD:

case ISD::STORE: {

// Get alias information for C.

+ // TODO: Relax aliasing for unordered atomics (see D66309)

bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&

- !cast<LSBaseSDNode>(C.getNode())->isVolatile();

+ cast<LSBaseSDNode>(C.getNode())->isSimple();

if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {

// Look further up the chain.

C = C.getOperand(0);

@@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {

// If the chain has more than one use, then we can't reorder the mem ops.

if (!SDValue(Chain, 0)->hasOneUse())

break;

- if (Chain->isVolatile() || Chain->isIndexed())

+ // TODO: Relax for unordered atomics (see D66309)

+ if (!Chain->isSimple() || Chain->isIndexed())

break;

// Find the base pointer and offset for this memory node.

@@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {

SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);

CombineTo(St, TF);

- AddToWorklist(STChain);

- // Add TF operands worklist in reverse order.

- for (auto I = TF->getNumOperands(); I;)

- AddToWorklist(TF->getOperand(--I).getNode());

+ // Add TF and its operands to the worklist.

AddToWorklist(TF.getNode());

+ for (const SDValue &Op : TF->ops())

+ AddToWorklist(Op.getNode());

+ AddToWorklist(STChain);

return true;

}

diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 22c23ba877e8..6d7260d7aee5 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp

@@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {

if (RegDef)

return 0;

RegDef = MO.getReg();

- } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {

+ } else if (Register::isVirtualRegister(MO.getReg())) {

// This is another use of a vreg. Don't try to sink it.

return 0;

}

@@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {

if (!FrameAlign)

FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);

Flags.setByValSize(FrameSize);

- Flags.setByValAlign(FrameAlign);

+ Flags.setByValAlign(Align(FrameAlign));

}

if (Arg.IsNest)

Flags.setNest();

if (NeedsRegBlock)

Flags.setInConsecutiveRegs();

- unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);

- Flags.setOrigAlign(OriginalAlignment);

+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));

CLI.OutVals.push_back(Arg.Val);

CLI.OutFlags.push_back(Flags);

@@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {

updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);

// Set labels for heapallocsite call.

- if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {

- MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");

+ if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {

+ const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");

MF->addCodeViewHeapAllocSite(CLI.Call, MD);

}

@@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) {

ExtraInfo |= InlineAsm::Extra_HasSideEffects;

if (IA->isAlignStack())

ExtraInfo |= InlineAsm::Extra_IsAlignStack;

+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,

TII.get(TargetOpcode::INLINEASM))

@@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

"Expected inlined-at fields to agree");

// A dbg.declare describes the address of a source variable, so lower it

// into an indirect DBG_VALUE.

+ auto *Expr = DI->getExpression();

+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,

- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,

- *Op, DI->getVariable(), DI->getExpression());

+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,

+ *Op, DI->getVariable(), Expr);

} else {

// We can't yet handle anything else here because it would require

// generating code, thus altering codegen because of debug info.

@@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

if (CI->getBitWidth() > 64)

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addCImm(CI)

- .addImm(0U)

+ .addReg(0U)

.addMetadata(DI->getVariable())

.addMetadata(DI->getExpression());

else

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addImm(CI->getZExtValue())

- .addImm(0U)

+ .addReg(0U)

.addMetadata(DI->getVariable())

.addMetadata(DI->getExpression());

} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {

BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)

.addFPImm(CF)

- .addImm(0U)

+ .addReg(0U)

.addMetadata(DI->getVariable())

.addMetadata(DI->getExpression());

} else if (unsigned Reg = lookUpRegForValue(V)) {

@@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {

TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());

return true;

}

- case Intrinsic::objectsize: {

- ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));

- unsigned long long Res = CI->isZero() ? -1ULL : 0;

- Constant *ResCI = ConstantInt::get(II->getType(), Res);

- unsigned ResultReg = getRegForValue(ResCI);

- if (!ResultReg)

- return false;

- updateValueMap(II, ResultReg);

- return true;

- }

- case Intrinsic::is_constant: {

- Constant *ResCI = ConstantInt::get(II->getType(), 0);

- unsigned ResultReg = getRegForValue(ResCI);

- if (!ResultReg)

- return false;

- updateValueMap(II, ResultReg);

- return true;

- }

+ case Intrinsic::objectsize:

+ llvm_unreachable("llvm.objectsize.* should have been lowered already");

+ case Intrinsic::is_constant:

+ llvm_unreachable("llvm.is.constant.* should have been lowered already");

case Intrinsic::launder_invariant_group:

case Intrinsic::strip_invariant_group:

case Intrinsic::expect: {

@@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) {

/// (fall-through) successor, and update the CFG.

void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,

const DebugLoc &DbgLoc) {

- if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&

+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&

FuncInfo.MBB->isLayoutSuccessor(MSucc)) {

- // For more accurate line information if this is the only instruction

- // in the block then emit it, otherwise we have the unconditional

- // fall-through case, which needs no instructions.

+ // For more accurate line information if this is the only non-debug

+ // instruction in the block then emit it, otherwise we have the

+ // unconditional fall-through case, which needs no instructions.

} else {

// The unconditional branch case.

TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,

@@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {

unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,

unsigned OpNum) {

- if (TargetRegisterInfo::isVirtualRegister(Op)) {

+ if (Register::isVirtualRegister(Op)) {

const TargetRegisterClass *RegClass =

TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);

if (!MRI.constrainRegClass(Op, RegClass)) {

@@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,

unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,

bool Op0IsKill, uint32_t Idx) {

unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));

- assert(TargetRegisterInfo::isVirtualRegister(Op0) &&

+ assert(Register::isVirtualRegister(Op0) &&

"Cannot yet extract from physregs");

const TargetRegisterClass *RC = MRI.getRegClass(Op0);

MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));

@@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {

} else

return nullptr;

- bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;

- bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;

- bool IsDereferenceable =

- I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;

+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);

+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);

+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);

const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);

AAMDNodes AAInfo;

diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b1759246b76..cf6711adad48 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp

@@ -12,6 +12,7 @@

//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/FunctionLoweringInfo.h"

+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"

#include "llvm/CodeGen/Analysis.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

@@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {

unsigned BitWidth = IntVT.getSizeInBits();

unsigned DestReg = ValueMap[PN];

- if (!TargetRegisterInfo::isVirtualRegister(DestReg))

+ if (!Register::isVirtualRegister(DestReg))

return;

LiveOutRegInfo.grow(DestReg);

LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];

@@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {

assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"

"CopyToReg node was created.");

unsigned SrcReg = ValueMap[V];

- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {

+ if (!Register::isVirtualRegister(SrcReg)) {

DestLOI.IsValid = false;

return;

}

@@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {

assert(ValueMap.count(V) && "V should have been placed in ValueMap when "

"its CopyToReg node was created.");

unsigned SrcReg = ValueMap[V];

- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {

+ if (!Register::isVirtualRegister(SrcReg)) {

DestLOI.IsValid = false;

return;

}

diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9bc07d35dfc5..c5095995ec2e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

@@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,

if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))

continue;

if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))

- if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))

+ if (Register::isPhysicalRegister(RN->getReg()))

continue;

NumImpUses = N - I;

break;

@@ -86,7 +86,7 @@ void InstrEmitter::

EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,

unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {

unsigned VRBase = 0;

- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {

+ if (Register::isVirtualRegister(SrcReg)) {

// Just use the input register directly!

SDValue Op(Node, ResNo);

if (IsClone)

@@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,

User->getOperand(2).getNode() == Node &&

User->getOperand(2).getResNo() == ResNo) {

unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {

+ if (Register::isVirtualRegister(DestReg)) {

VRBase = DestReg;

Match = false;

} else if (DestReg != SrcReg)

@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,

UseRC = RC;

else if (RC) {

const TargetRegisterClass *ComRC =

- TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);

+ TRI->getCommonSubClass(UseRC, RC);

// If multiple uses expect disjoint register classes, we emit

// copies in AddRegisterOperand.

if (ComRC)

@@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,

if (II.OpInfo[i].isOptionalDef()) {

// Optional def must be a physical register.

VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();

- assert(TargetRegisterInfo::isPhysicalRegister(VRBase));

+ assert(Register::isPhysicalRegister(VRBase));

MIB.addReg(VRBase, RegState::Define);

}

@@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,

User->getOperand(2).getNode() == Node &&

User->getOperand(2).getResNo() == i) {

unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg)) {

+ if (Register::isVirtualRegister(Reg)) {

const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);

if (RegRC == RC) {

VRBase = Reg;

@@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op,

// does not include operand register class info.

const TargetRegisterClass *RC = TLI->getRegClassFor(

Op.getSimpleValueType(), Op.getNode()->isDivergent());

- unsigned VReg = MRI->createVirtualRegister(RC);

+ Register VReg = MRI->createVirtualRegister(RC);

BuildMI(*MBB, InsertPos, Op.getDebugLoc(),

TII->get(TargetOpcode::IMPLICIT_DEF), VReg);

return VReg;

@@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,

if (!ConstrainedRC) {

OpRC = TRI->getAllocatableClass(OpRC);

assert(OpRC && "Constraints cannot be fulfilled for allocation");

- unsigned NewVReg = MRI->createVirtualRegister(OpRC);

+ Register NewVReg = MRI->createVirtualRegister(OpRC);

BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),

TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);

VReg = NewVReg;

@@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,

(IIRC && TRI->isDivergentRegClass(IIRC)))

: nullptr;

- if (OpRC && IIRC && OpRC != IIRC &&

- TargetRegisterInfo::isVirtualRegister(VReg)) {

- unsigned NewVReg = MRI->createVirtualRegister(IIRC);

+ if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {

+ Register NewVReg = MRI->createVirtualRegister(IIRC);

BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),

TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);

VReg = NewVReg;

@@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,

// register instead.

RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);

assert(RC && "No legal register class for VT supports that SubIdx");

- unsigned NewReg = MRI->createVirtualRegister(RC);

+ Register NewReg = MRI->createVirtualRegister(RC);

BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)

.addReg(VReg);

return NewReg;

@@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,

if (User->getOpcode() == ISD::CopyToReg &&

User->getOperand(2).getNode() == Node) {

unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {

+ if (Register::isVirtualRegister(DestReg)) {

VRBase = DestReg;

break;

}

@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,

unsigned Reg;

MachineInstr *DefMI;

RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));

- if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {

+ if (R && Register::isPhysicalRegister(R->getReg())) {

Reg = R->getReg();

DefMI = nullptr;

} else {

@@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,

// Reg may not support a SubIdx sub-register, and we may need to

// constrain its register class or issue a COPY to a compatible register

// class.

- if (TargetRegisterInfo::isVirtualRegister(Reg))

+ if (Register::isVirtualRegister(Reg))

Reg = ConstrainForSubReg(Reg, SubIdx,

Node->getOperand(0).getSimpleValueType(),

Node->isDivergent(), Node->getDebugLoc());

@@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,

MachineInstrBuilder CopyMI =

BuildMI(*MBB, InsertPos, Node->getDebugLoc(),

TII->get(TargetOpcode::COPY), VRBase);

- if (TargetRegisterInfo::isVirtualRegister(Reg))

+ if (Register::isVirtualRegister(Reg))

CopyMI.addReg(Reg, 0, SubIdx);

else

CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));

@@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,

unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

const TargetRegisterClass *DstRC =

TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));

- unsigned NewVReg = MRI->createVirtualRegister(DstRC);

+ Register NewVReg = MRI->createVirtualRegister(DstRC);

BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),

NewVReg).addReg(VReg);

@@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,

bool IsClone, bool IsCloned) {

unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();

const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);

- unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));

+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));

const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);

MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);

unsigned NumOps = Node->getNumOperands();

@@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,

RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));

// Skip physical registers as they don't have a vreg to get and we'll

// insert copies for them in TwoAddressInstructionPass anyway.

- if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {

+ if (!R || !Register::isPhysicalRegister(R->getReg())) {

unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();

unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);

const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);

@@ -678,7 +677,7 @@ MachineInstr *

InstrEmitter::EmitDbgValue(SDDbgValue *SD,

DenseMap<SDValue, unsigned> &VRBaseMap) {

MDNode *Var = SD->getVariable();

- MDNode *Expr = SD->getExpression();

+ const DIExpression *Expr = SD->getExpression();

DebugLoc DL = SD->getDebugLoc();

assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&

"Expected inlined-at fields to agree");

@@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,

// EmitTargetCodeForFrameDebugValue is responsible for allocation.

auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))

.addFrameIndex(SD->getFrameIx());

if (SD->isIndirect())

- // Push [fi + 0] onto the DIExpression stack.

- FrameMI.addImm(0);

- else

- // Push fi onto the DIExpression stack.

- FrameMI.addReg(0);

+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});

+ FrameMI.addReg(0);

return FrameMI.addMetadata(Var).addMetadata(Expr);

}

// Otherwise, we're going to create an instruction here.

@@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,

// Indirect addressing is indicated by an Imm as the second parameter.

if (SD->isIndirect())

- MIB.addImm(0U);

- else

- MIB.addReg(0U, RegState::Debug);

+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});

+ MIB.addReg(0U, RegState::Debug);

MIB.addMetadata(Var);

MIB.addMetadata(Expr);

@@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,

// Collect all the used physreg defs, and make sure that any unused physreg

// defs are marked as dead.

- SmallVector<unsigned, 8> UsedRegs;

+ SmallVector<Register, 8> UsedRegs;

// Additional results must be physical register defs.

if (HasPhysRegOuts) {

for (unsigned i = NumDefs; i < NumResults; ++i) {

- unsigned Reg = II.getImplicitDefs()[i - NumDefs];

+ Register Reg = II.getImplicitDefs()[i - NumDefs];

if (!Node->hasAnyUseOfValue(i))

continue;

// This implicitly defined physreg has a use.

@@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,

// direct RegisterSDNode operands.

for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)

if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {

- unsigned Reg = R->getReg();

- if (TargetRegisterInfo::isPhysicalRegister(Reg))

+ Register Reg = R->getReg();

+ if (Reg.isPhysical())

UsedRegs.push_back(Reg);

}

@@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,

case ISD::CopyToReg: {

unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();

SDValue SrcVal = Node->getOperand(2);

- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&

- SrcVal.isMachineOpcode() &&

+ if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&

SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {

// Instead building a COPY to that vreg destination, build an

// IMPLICIT_DEF instruction instead.

@@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,

// FIXME: Add dead flags for physical and virtual registers defined.

// For now, mark physical register defs as implicit to help fast

// regalloc. This makes inline asm look a lot like calls.

- MIB.addReg(Reg, RegState::Define |

- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));

+ MIB.addReg(Reg,

+ RegState::Define |

+ getImplRegState(Register::isPhysicalRegister(Reg)));

}

break;

case InlineAsm::Kind_RegDefEarlyClobber:

case InlineAsm::Kind_Clobber:

for (unsigned j = 0; j != NumVals; ++j, ++i) {

unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

- MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |

- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));

+ MIB.addReg(Reg,

+ RegState::Define | RegState::EarlyClobber |

+ getImplRegState(Register::isPhysicalRegister(Reg)));

ECRegs.push_back(Reg);

}

break;

@@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,

// then remove the early-clobber flag.

for (unsigned Reg : ECRegs) {

if (MIB->readsRegister(Reg, TRI)) {

- MachineOperand *MO =

+ MachineOperand *MO =

MIB->findRegisterDefOperand(Reg, false, false, TRI);

assert(MO && "No def operand for clobbered register?");

MO->setIsEarlyClobber(false);

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bf817f00f83d..f9fdf525240f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -17,6 +17,7 @@

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

+#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineJumpTableInfo.h"

@@ -161,6 +162,7 @@ private:

SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,

const SDLoc &dl, SDValue ChainIn);

SDValue ExpandBUILD_VECTOR(SDNode *Node);

+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);

SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);

void ExpandDYNAMIC_STACKALLOC(SDNode *Node,

SmallVectorImpl<SDValue> &Results);

@@ -236,6 +238,16 @@ public:

}

ReplacedNode(Old);

}

+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {

+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);

+ dbgs() << " with: "; New->dump(&DAG));

+ DAG.ReplaceAllUsesOfValueWith(Old, New);

+ if (UpdatedNodes)

+ UpdatedNodes->insert(New.getNode());

+ ReplacedNode(Old.getNode());

+ }

};

} // end anonymous namespace

@@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {

// expand it.

EVT MemVT = ST->getMemoryVT();

const DataLayout &DL = DAG.getDataLayout();

- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,

- *ST->getMemOperand())) {

+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,

+ *ST->getMemOperand())) {

LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");

SDValue Result = TLI.expandUnalignedStore(ST, DAG);

ReplaceNode(SDValue(ST, 0), Result);

@@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {

EVT MemVT = ST->getMemoryVT();

// If this is an unaligned store and the target doesn't support it,

// expand it.

- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,

- *ST->getMemOperand())) {

+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,

+ *ST->getMemOperand())) {

SDValue Result = TLI.expandUnalignedStore(ST, DAG);

ReplaceNode(SDValue(ST, 0), Result);

}

@@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {

const DataLayout &DL = DAG.getDataLayout();

// If this is an unaligned load and the target doesn't support it,

// expand it.

- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,

- *LD->getMemOperand())) {

+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,

+ *LD->getMemOperand())) {

std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);

}

break;

@@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {

if (SrcVT.getScalarType() == MVT::f16) {

EVT ISrcVT = SrcVT.changeTypeToInteger();

EVT IDestVT = DestVT.changeTypeToInteger();

- EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());

+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());

- SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,

- Chain, Ptr, ISrcVT,

- LD->getMemOperand());

+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,

+ Ptr, ISrcVT, LD->getMemOperand());

Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);

Chain = Result.getValue(1);

break;

@@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

#ifndef NDEBUG

for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)

- assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==

- TargetLowering::TypeLegal ||

- TLI.isTypeLegal(Node->getValueType(i))) &&

+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==

+ TargetLowering::TypeLegal &&

"Unexpected illegal type!");

for (const SDValue &Op : Node->op_values())

assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==

TargetLowering::TypeLegal ||

- TLI.isTypeLegal(Op.getValueType()) ||

Op.getOpcode() == ISD::TargetConstant ||

Op.getOpcode() == ISD::Register) &&

"Unexpected illegal type!");

@@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

Action = TLI.getOperationAction(Node->getOpcode(),

Node->getOperand(0).getValueType());

break;

- case ISD::FP_ROUND_INREG:

case ISD::SIGN_EXTEND_INREG: {

EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();

Action = TLI.getOperationAction(Node->getOpcode(), InnerType);

@@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

return;

}

break;

- case ISD::STRICT_FADD:

- case ISD::STRICT_FSUB:

- case ISD::STRICT_FMUL:

- case ISD::STRICT_FDIV:

- case ISD::STRICT_FREM:

- case ISD::STRICT_FSQRT:

- case ISD::STRICT_FMA:

- case ISD::STRICT_FPOW:

- case ISD::STRICT_FPOWI:

- case ISD::STRICT_FSIN:

- case ISD::STRICT_FCOS:

- case ISD::STRICT_FEXP:

- case ISD::STRICT_FEXP2:

- case ISD::STRICT_FLOG:

- case ISD::STRICT_FLOG10:

- case ISD::STRICT_FLOG2:

- case ISD::STRICT_FRINT:

- case ISD::STRICT_FNEARBYINT:

- case ISD::STRICT_FMAXNUM:

- case ISD::STRICT_FMINNUM:

- case ISD::STRICT_FCEIL:

- case ISD::STRICT_FFLOOR:

- case ISD::STRICT_FROUND:

- case ISD::STRICT_FTRUNC:

- case ISD::STRICT_FP_ROUND:

- case ISD::STRICT_FP_EXTEND:

- // These pseudo-ops get legalized as if they were their non-strict

- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT

- // is also legal, but if ISD::FSQRT requires expansion then so does

- // ISD::STRICT_FSQRT.

+ case ISD::STRICT_LRINT:

+ case ISD::STRICT_LLRINT:

+ case ISD::STRICT_LROUND:

+ case ISD::STRICT_LLROUND:

+ // These pseudo-ops are the same as the other STRICT_ ops except

+ // they are registered with setOperationAction() using the input type

+ // instead of the output type.

Action = TLI.getStrictFPOperationAction(Node->getOpcode(),

- Node->getValueType(0));

+ Node->getOperand(1).getValueType());

break;

case ISD::SADDSAT:

case ISD::UADDSAT:

@@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {

}

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

- case ISD::UMULFIX: {

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: {

unsigned Scale = Node->getConstantOperandVal(2);

Action = TLI.getFixedPointOperationAction(Node->getOpcode(),

Node->getValueType(0), Scale);

@@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,

MVT OpVT = LHS.getSimpleValueType();

ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();

NeedInvert = false;

- bool NeedSwap = false;

switch (TLI.getCondCodeAction(CCCode, OpVT)) {

default: llvm_unreachable("Unknown condition code action!");

case TargetLowering::Legal:

@@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,

return true;

}

// Swapping operands didn't work. Try inverting the condition.

+ bool NeedSwap = false;

InvCC = getSetCCInverse(CCCode, OpVT.isInteger());

if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {

// If inverting the condition is not enough, try swapping operands

@@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {

return ExpandVectorBuildThroughStack(Node);

}

+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {

+ SDLoc DL(Node);

+ EVT VT = Node->getValueType(0);

+ SDValue SplatVal = Node->getOperand(0);

+ return DAG.getSplatBuildVector(VT, DL, SplatVal);

// Expand a node into a call to a libcall. If the result value

// does not fit into a register, return the lo part and set the hi part to the

// by-reg argument. If it does fit into a single register, return the result

@@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,

std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);

if (!CallInfo.second.getNode()) {

- LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());

+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));

// It's a tailcall, return the chain (which is the DAG root).

return DAG.getRoot();

}

- LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());

+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));

return CallInfo.first;

}

@@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,

RTLIB::Libcall Call_F80,

RTLIB::Libcall Call_F128,

RTLIB::Libcall Call_PPCF128) {

+ if (Node->isStrictFPOpcode())

+ Node = DAG.mutateStrictFPToFP(Node);

RTLIB::Libcall LC;

switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {

default: llvm_unreachable("Unexpected request for libcall!");

@@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

break;

}

case ISD::STRICT_FP_ROUND:

+ // This expansion does not honor the "strict" properties anyway,

+ // so prefer falling back to the non-strict operation if legal.

+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),

+ Node->getValueType(0))

+ == TargetLowering::Legal)

+ break;

Tmp1 = EmitStackConvert(Node->getOperand(1),

Node->getValueType(0),

Node->getValueType(0), dl, Node->getOperand(0));

@@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

Results.push_back(Tmp1);

break;

case ISD::STRICT_FP_EXTEND:

+ // This expansion does not honor the "strict" properties anyway,

+ // so prefer falling back to the non-strict operation if legal.

+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),

+ Node->getValueType(0))

+ == TargetLowering::Legal)

+ break;

Tmp1 = EmitStackConvert(Node->getOperand(1),

Node->getOperand(1).getValueType(),

Node->getValueType(0), dl, Node->getOperand(0));

@@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

Results.push_back(Tmp1);

break;

}

- case ISD::FP_ROUND_INREG: {

- // The only way we can lower this is to turn it into a TRUNCSTORE,

- // EXTLOAD pair, targeting a temporary location (a stack slot).

- // NOTE: there is a choice here between constantly creating new stack

- // slots and always reusing the same one. We currently always create

- // new ones, as reuse may inhibit scheduling.

- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();

- Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,

- Node->getValueType(0), dl);

- Results.push_back(Tmp1);

- break;

- }

case ISD::UINT_TO_FP:

if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {

Results.push_back(Tmp1);

@@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))

Results.push_back(Tmp1);

break;

+ case ISD::STRICT_FP_TO_SINT:

+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {

+ ReplaceNode(Node, Tmp1.getNode());

+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");

+ return true;

+ }

+ break;

case ISD::FP_TO_UINT:

- if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))

+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))

Results.push_back(Tmp1);

break;

- case ISD::LROUND:

- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,

- RTLIB::LROUND_F64, RTLIB::LROUND_F80,

- RTLIB::LROUND_F128,

- RTLIB::LROUND_PPCF128));

- break;

- case ISD::LLROUND:

- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,

- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,

- RTLIB::LLROUND_F128,

- RTLIB::LLROUND_PPCF128));

- break;

- case ISD::LRINT:

- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,

- RTLIB::LRINT_F64, RTLIB::LRINT_F80,

- RTLIB::LRINT_F128,

- RTLIB::LRINT_PPCF128));

- break;

- case ISD::LLRINT:

- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,

- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,

- RTLIB::LLRINT_F128,

- RTLIB::LLRINT_PPCF128));

+ case ISD::STRICT_FP_TO_UINT:

+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {

+ // Relink the chain.

+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);

+ // Replace the new UINT result.

+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);

+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");

+ return true;

+ }

break;

case ISD::VAARG:

Results.push_back(DAG.expandVAArg(Node));

@@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

case ISD::UMULFIX:

+ case ISD::UMULFIXSAT:

Results.push_back(TLI.expandFixedPointMul(Node, DAG));

break;

case ISD::ADDCARRY:

@@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

case ISD::BUILD_VECTOR:

Results.push_back(ExpandBUILD_VECTOR(Node));

break;

+ case ISD::SPLAT_VECTOR:

+ Results.push_back(ExpandSPLAT_VECTOR(Node));

+ break;

case ISD::SRA:

case ISD::SRL:

case ISD::SHL: {

@@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {

break;

}

+ if (Results.empty() && Node->isStrictFPOpcode()) {

+ // FIXME: We were asked to expand a strict floating-point operation,

+ // but there is currently no expansion implemented that would preserve

+ // the "strict" properties. For now, we just fall back to the non-strict

+ // version if that is legal on the target. The actual mutation of the

+ // operation will happen in SelectionDAGISel::DoInstructionSelection.

+ switch (Node->getOpcode()) {

+ default:

+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),

+ Node->getValueType(0))

+ == TargetLowering::Legal)

+ return true;

+ break;

+ case ISD::STRICT_LRINT:

+ case ISD::STRICT_LLRINT:

+ case ISD::STRICT_LROUND:

+ case ISD::STRICT_LLROUND:

+ // These are registered by the operand type instead of the value

+ // type. Reflect that here.

+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),

+ Node->getOperand(1).getValueType())

+ == TargetLowering::Legal)

+ return true;

+ break;

+ }

// Replace the original node with the legalized result.

if (Results.empty()) {

LLVM_DEBUG(dbgs() << "Cannot expand node\n");

@@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {

RTLIB::POW_F80, RTLIB::POW_F128,

RTLIB::POW_PPCF128));

break;

+ case ISD::LROUND:

+ case ISD::STRICT_LROUND:

+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,

+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,

+ RTLIB::LROUND_F128,

+ RTLIB::LROUND_PPCF128));

+ break;

+ case ISD::LLROUND:

+ case ISD::STRICT_LLROUND:

+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,

+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,

+ RTLIB::LLROUND_F128,

+ RTLIB::LLROUND_PPCF128));

+ break;

+ case ISD::LRINT:

+ case ISD::STRICT_LRINT:

+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,

+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,

+ RTLIB::LRINT_F128,

+ RTLIB::LRINT_PPCF128));

+ break;

+ case ISD::LLRINT:

+ case ISD::STRICT_LLRINT:

+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,

+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,

+ RTLIB::LLRINT_F128,

+ RTLIB::LLRINT_PPCF128));

+ break;

case ISD::FDIV:

Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,

RTLIB::DIV_F80, RTLIB::DIV_F128,

diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b4849b2881e6..72d052473f11 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

@@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,

}

//===----------------------------------------------------------------------===//

-// Convert Float Results to Integer for Non-HW-supported Operations.

+// Convert Float Results to Integer

//===----------------------------------------------------------------------===//

-bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);

dbgs() << "\n");

SDValue R = SDValue();

@@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

#endif

llvm_unreachable("Do not know how to soften the result of this operator!");

- case ISD::Register:

- case ISD::CopyFromReg:

- case ISD::CopyToReg:

- assert(isLegalInHWReg(N->getValueType(ResNo)) &&

- "Unsupported SoftenFloatRes opcode!");

- // Only when isLegalInHWReg, we can skip check of the operands.

- R = SDValue(N, ResNo);

- break;

case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;

- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;

+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;

case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;

- case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;

+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;

case ISD::EXTRACT_VECTOR_ELT:

R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;

- case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;

+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;

case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;

case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;

case ISD::FADD: R = SoftenFloatRes_FADD(N); break;

case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;

- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;

+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;

case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;

case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;

case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;

@@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

case ISD::FMA: R = SoftenFloatRes_FMA(N); break;

case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;

case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;

- case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;

+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;

case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;

case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;

case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;

@@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {

case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;

case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;

case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;

- case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;

+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;

case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;

- case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;

- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;

+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;

+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;

case ISD::SINT_TO_FP:

case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;

case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;

case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;

}

- if (R.getNode() && R.getNode() != N) {

+ // If R is null, the sub-method took care of registering the result.

+ if (R.getNode()) {

+ assert(R.getNode() != N);

SetSoftenedFloat(SDValue(N, ResNo), R);

- // Return true only if the node is changed, assuming that the operands

- // are also converted when necessary.

- return true;

}

- // Otherwise, return false to tell caller to scan operands.

- return false;

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {

return BitConvertToInteger(N->getOperand(0));

}

@@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {

BitConvertToInteger(N->getOperand(1)));

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {

- // When LegalInHWReg, we can load better from the constant pool.

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {

ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);

// In ppcf128, the high 64 bits are always first in memory regardless

// of Endianness. LLVM's APFloat representation is not Endian sensitive,

@@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {

}

SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {

- // When LegalInHWReg, keep the extracted value in register.

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),

NewOp.getValueType().getVectorElementType(),

NewOp, N->getOperand(1));

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {

- // When LegalInHWReg, FABS can be implemented as native bitwise operations.

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

unsigned Size = NVT.getSizeInBits();

@@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::FMIN_F32,

RTLIB::FMIN_F64,

RTLIB::FMIN_F80,

RTLIB::FMIN_F128,

RTLIB::FMIN_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::FMAX_F32,

RTLIB::FMAX_F64,

RTLIB::FMAX_F80,

RTLIB::FMAX_F128,

RTLIB::FMAX_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::ADD_F32,

RTLIB::ADD_F64,

RTLIB::ADD_F80,

RTLIB::ADD_F128,

RTLIB::ADD_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::CEIL_F32,

RTLIB::CEIL_F64,

RTLIB::CEIL_F80,

RTLIB::CEIL_F128,

RTLIB::CEIL_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {

- // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {

SDValue LHS = GetSoftenedFloat(N->getOperand(0));

SDValue RHS = BitConvertToInteger(N->getOperand(1));

SDLoc dl(N);

@@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {

SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::COS_F32,

RTLIB::COS_F64,

RTLIB::COS_F80,

RTLIB::COS_F128,

RTLIB::COS_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::DIV_F32,

RTLIB::DIV_F64,

RTLIB::DIV_F80,

RTLIB::DIV_F128,

RTLIB::DIV_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::EXP_F32,

RTLIB::EXP_F64,

RTLIB::EXP_F80,

RTLIB::EXP_F128,

RTLIB::EXP_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::EXP2_F32,

RTLIB::EXP2_F64,

RTLIB::EXP2_F80,

RTLIB::EXP2_F128,

RTLIB::EXP2_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::FLOOR_F32,

RTLIB::FLOOR_F64,

RTLIB::FLOOR_F80,

RTLIB::FLOOR_F128,

RTLIB::FLOOR_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::LOG_F32,

RTLIB::LOG_F64,

RTLIB::LOG_F80,

RTLIB::LOG_F128,

RTLIB::LOG_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::LOG2_F32,

RTLIB::LOG2_F64,

RTLIB::LOG2_F80,

RTLIB::LOG2_F128,

RTLIB::LOG2_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::LOG10_F32,

RTLIB::LOG10_F64,

RTLIB::LOG10_F80,

RTLIB::LOG10_F128,

RTLIB::LOG10_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {

@@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {

SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)),

GetSoftenedFloat(N->getOperand(2)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[3] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType(),

+ N->getOperand(2).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::FMA_F32,

RTLIB::FMA_F64,

RTLIB::FMA_F80,

RTLIB::FMA_F128,

RTLIB::FMA_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::MUL_F32,

RTLIB::MUL_F64,

RTLIB::MUL_F80,

RTLIB::MUL_F128,

RTLIB::MUL_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::NEARBYINT_F32,

RTLIB::NEARBYINT_F64,

RTLIB::NEARBYINT_F80,

RTLIB::NEARBYINT_F128,

RTLIB::NEARBYINT_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {

- // When LegalInHWReg, FNEG can be implemented as native bitwise operations.

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDLoc dl(N);

- EVT FloatVT = N->getValueType(ResNo);

+ EVT FloatVT = N->getValueType(0);

if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {

// Expand Y = FNEG(X) -> Y = X ^ sign mask

APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());

@@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {

// Expand Y = FNEG(X) -> Y = SUB -0.0, X

SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),

GetSoftenedFloat(N->getOperand(0)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::SUB_F32,

RTLIB::SUB_F64,

RTLIB::SUB_F80,

RTLIB::SUB_F128,

RTLIB::SUB_PPCF128),

- NVT, Ops, false, dl).first;

+ NVT, Ops, CallOptions, dl).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {

@@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");

- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;

}

// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special

@@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {

SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {

EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);

SDValue Op = N->getOperand(0);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,

- false, SDLoc(N)).first;

+ CallOptions, SDLoc(N)).first;

if (N->getValueType(0) == MVT::f32)

return Res32;

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");

- return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;

+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {

@@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");

- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::POW_F32,

RTLIB::POW_F64,

RTLIB::POW_F80,

RTLIB::POW_F128,

RTLIB::POW_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {

@@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {

"Unsupported power type!");

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::POWI_F32,

RTLIB::POWI_F64,

RTLIB::POWI_F80,

RTLIB::POWI_F128,

RTLIB::POWI_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::REM_F32,

RTLIB::REM_F64,

RTLIB::REM_F80,

RTLIB::REM_F128,

RTLIB::REM_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::RINT_F32,

RTLIB::RINT_F64,

RTLIB::RINT_F80,

RTLIB::RINT_F128,

RTLIB::RINT_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::ROUND_F32,

RTLIB::ROUND_F64,

RTLIB::ROUND_F80,

RTLIB::ROUND_F128,

RTLIB::ROUND_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::SIN_F32,

RTLIB::SIN_F64,

RTLIB::SIN_F80,

RTLIB::SIN_F128,

RTLIB::SIN_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::SQRT_F32,

RTLIB::SQRT_F64,

RTLIB::SQRT_F80,

RTLIB::SQRT_F128,

RTLIB::SQRT_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),

GetSoftenedFloat(N->getOperand(1)) };

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),

+ N->getOperand(1).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::SUB_F32,

RTLIB::SUB_F64,

RTLIB::SUB_F80,

RTLIB::SUB_F128,

RTLIB::SUB_PPCF128),

- NVT, Ops, false, SDLoc(N)).first;

+ NVT, Ops, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {

@@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {

return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));

SDValue Op = GetSoftenedFloat(N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::TRUNC_F32,

RTLIB::TRUNC_F64,

RTLIB::TRUNC_F80,

RTLIB::TRUNC_F128,

RTLIB::TRUNC_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {

- bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));

+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {

LoadSDNode *L = cast<LoadSDNode>(N);

EVT VT = N->getValueType(0);

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);

@@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {

// use the new one.

ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));

auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);

- if (LegalInHWReg)

- return ExtendNode;

return BitConvertToInteger(ExtendNode);

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {

SDValue LHS = GetSoftenedFloat(N->getOperand(1));

SDValue RHS = GetSoftenedFloat(N->getOperand(2));

return DAG.getSelect(SDLoc(N),

LHS.getValueType(), N->getOperand(0), LHS, RHS);

}

-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {

- if (isLegalInHWReg(N->getValueType(ResNo)))

- return SDValue(N, ResNo);

+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {

SDValue LHS = GetSoftenedFloat(N->getOperand(2));

SDValue RHS = GetSoftenedFloat(N->getOperand(3));

return DAG.getNode(ISD::SELECT_CC, SDLoc(N),

@@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {

// Sign/zero extend the argument if the libcall takes a larger type.

SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,

NVT, N->getOperand(0));

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(Signed);

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, LC,

TLI.getTypeToTransformTo(*DAG.getContext(), RVT),

- Op, Signed, dl).first;

+ Op, CallOptions, dl).first;

}

//===----------------------------------------------------------------------===//

-// Convert Float Operand to Integer for Non-HW-supported Operations.

+// Convert Float Operand to Integer

//===----------------------------------------------------------------------===//

bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

@@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

switch (N->getOpcode()) {

default:

- if (CanSkipSoftenFloatOperand(N, OpNo))

- return false;

#ifndef NDEBUG

dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";

N->dump(&DAG); dbgs() << "\n";

@@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

llvm_unreachable("Do not know how to soften this operator's operand!");

case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;

- case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;

case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;

- case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;

- case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;

- case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;

case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;

case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes

case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;

@@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;

case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;

case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;

- case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;

case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;

case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;

- case ISD::STORE:

- Res = SoftenFloatOp_STORE(N, OpNo);

- // Do not try to analyze or soften this node again if the value is

- // or can be held in a register. In that case, Res.getNode() should

- // be equal to N.

- if (Res.getNode() == N &&

- isLegalInHWReg(N->getOperand(OpNo).getValueType()))

- return false;

- // Otherwise, we need to reanalyze and lower the new Res nodes.

- break;

+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;

}

// If the result is null, the sub-method took care of registering results etc.

@@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {

return true;

assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&

- "Invalid operand expansion");

+ "Invalid operand promotion");

ReplaceValueWith(SDValue(N, 0), Res);

return false;

}

-bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {

- if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))

- return false;

- // When the operand type can be kept in registers there is nothing to do for

- // the following opcodes.

- switch (N->getOperand(OpNo).getOpcode()) {

- case ISD::BITCAST:

- case ISD::ConstantFP:

- case ISD::CopyFromReg:

- case ISD::CopyToReg:

- case ISD::FABS:

- case ISD::FCOPYSIGN:

- case ISD::FNEG:

- case ISD::Register:

- case ISD::SELECT:

- case ISD::SELECT_CC:

- return true;

- }

- switch (N->getOpcode()) {

- case ISD::ConstantFP: // Leaf node.

- case ISD::CopyFromReg: // Operand is a register that we know to be left

- // unchanged by SoftenFloatResult().

- case ISD::Register: // Leaf node.

- return true;

- }

- return false;

SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {

- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),

- GetSoftenedFloat(N->getOperand(0)));

-SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {

- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));

- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));

- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))

- return SDValue();

- if (N->getNumOperands() == 3)

- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);

+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));

- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,

- N->getOperand(3)),

- 0);

+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);

}

SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {

@@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");

- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;

}

@@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");

SDValue Op = GetSoftenedFloat(N->getOperand(0));

- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {

@@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {

EVT VT = NewLHS.getValueType();

NewLHS = GetSoftenedFloat(NewLHS);

NewRHS = GetSoftenedFloat(NewRHS);

- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));

+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),

+ N->getOperand(2), N->getOperand(3));

// If softenSetCCOperands returned a scalar, we need to compare the result

// against zero to select between true and false values.

@@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {

0);

}

-SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {

- SDValue Op = GetSoftenedFloat(N->getOperand(0));

- if (Op == N->getOperand(0))

- return SDValue();

- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);

-SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {

- SDValue Op0 = GetSoftenedFloat(N->getOperand(0));

- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));

- if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))

- return SDValue();

- return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);

-SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {

- SDValue Op = GetSoftenedFloat(N->getOperand(0));

- if (Op == N->getOperand(0))

- return SDValue();

- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);

SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {

bool Signed = N->getOpcode() == ISD::FP_TO_SINT;

EVT SVT = N->getOperand(0).getValueType();

@@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");

SDValue Op = GetSoftenedFloat(N->getOperand(0));

- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;

// Truncate the result if the libcall returns a larger type.

return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);

}

-SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {

- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));

- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));

- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))

- return SDValue();

- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),

- 0);

SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {

SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);

ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();

@@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {

EVT VT = NewLHS.getValueType();

NewLHS = GetSoftenedFloat(NewLHS);

NewRHS = GetSoftenedFloat(NewRHS);

- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));

+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),

+ N->getOperand(0), N->getOperand(1));

// If softenSetCCOperands returned a scalar, we need to compare the result

// against zero to select between true and false values.

@@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {

EVT VT = NewLHS.getValueType();

NewLHS = GetSoftenedFloat(NewLHS);

NewRHS = GetSoftenedFloat(NewRHS);

- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));

+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),

+ N->getOperand(0), N->getOperand(1));

// If softenSetCCOperands returned a scalar, use it.

if (!NewRHS.getNode()) {

@@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {

SDValue Op = GetSoftenedFloat(N->getOperand(0));

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LROUND_F32,

RTLIB::LROUND_F64,

RTLIB::LROUND_F80,

RTLIB::LROUND_F128,

RTLIB::LROUND_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {

@@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {

SDValue Op = GetSoftenedFloat(N->getOperand(0));

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LLROUND_F32,

RTLIB::LLROUND_F64,

RTLIB::LLROUND_F80,

RTLIB::LLROUND_F128,

RTLIB::LLROUND_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {

@@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {

SDValue Op = GetSoftenedFloat(N->getOperand(0));

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LRINT_F32,

RTLIB::LRINT_F64,

RTLIB::LRINT_F80,

RTLIB::LRINT_F128,

RTLIB::LRINT_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {

@@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {

SDValue Op = GetSoftenedFloat(N->getOperand(0));

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LLRINT_F32,

RTLIB::LLRINT_F64,

RTLIB::LLRINT_F80,

RTLIB::LLRINT_F128,

RTLIB::LLRINT_PPCF128),

- NVT, Op, false, SDLoc(N)).first;

+ NVT, Op, CallOptions, SDLoc(N)).first;

}

//===----------------------------------------------------------------------===//

@@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,

void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,

SDValue &Hi) {

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ TargetLowering::MakeLibCallOptions CallOptions;

SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::DIV_F32,

RTLIB::DIV_F64,

RTLIB::DIV_F80,

RTLIB::DIV_F128,

RTLIB::DIV_PPCF128),

- N->getValueType(0), Ops, false,

+ N->getValueType(0), Ops, CallOptions,

SDLoc(N)).first;

GetPairElements(Call, Lo, Hi);

}

@@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,

void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,

SDValue &Hi) {

SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };

+ TargetLowering::MakeLibCallOptions CallOptions;

SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::FMA_F32,

RTLIB::FMA_F64,

RTLIB::FMA_F80,

RTLIB::FMA_F128,

RTLIB::FMA_PPCF128),

- N->getValueType(0), Ops, false,

+ N->getValueType(0), Ops, CallOptions,

SDLoc(N)).first;

GetPairElements(Call, Lo, Hi);

}

@@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,

void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,

SDValue &Hi) {

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ TargetLowering::MakeLibCallOptions CallOptions;

SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::MUL_F32,

RTLIB::MUL_F64,

RTLIB::MUL_F80,

RTLIB::MUL_F128,

RTLIB::MUL_PPCF128),

- N->getValueType(0), Ops, false,

+ N->getValueType(0), Ops, CallOptions,

SDLoc(N)).first;

GetPairElements(Call, Lo, Hi);

}

@@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,

void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,

SDValue &Hi) {

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

+ TargetLowering::MakeLibCallOptions CallOptions;

SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),

RTLIB::SUB_F32,

RTLIB::SUB_F64,

RTLIB::SUB_F80,

RTLIB::SUB_F128,

RTLIB::SUB_PPCF128),

- N->getValueType(0), Ops, false,

+ N->getValueType(0), Ops, CallOptions,

SDLoc(N)).first;

GetPairElements(Call, Lo, Hi);

}

@@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,

}

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");

- Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;

GetPairElements(Hi, Lo, Hi);

}

@@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");

- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;

}

SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {

@@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),

- false, dl).first;

+ CallOptions, dl).first;

}

SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {

@@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {

SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {

EVT RVT = N->getValueType(0);

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LROUND_F32,

RTLIB::LROUND_F64,

RTLIB::LROUND_F80,

RTLIB::LROUND_F128,

RTLIB::LROUND_PPCF128),

- RVT, N->getOperand(0), false, SDLoc(N)).first;

+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {

EVT RVT = N->getValueType(0);

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LLROUND_F32,

RTLIB::LLROUND_F64,

RTLIB::LLROUND_F80,

RTLIB::LLROUND_F128,

RTLIB::LLROUND_PPCF128),

- RVT, N->getOperand(0), false, SDLoc(N)).first;

+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {

EVT RVT = N->getValueType(0);

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LRINT_F32,

RTLIB::LRINT_F64,

RTLIB::LRINT_F80,

RTLIB::LRINT_F128,

RTLIB::LRINT_PPCF128),

- RVT, N->getOperand(0), false, SDLoc(N)).first;

+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {

EVT RVT = N->getValueType(0);

EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;

+ TargetLowering::MakeLibCallOptions CallOptions;

return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,

RTLIB::LLRINT_F32,

RTLIB::LLRINT_F64,

RTLIB::LLRINT_F80,

RTLIB::LLRINT_F128,

RTLIB::LLRINT_PPCF128),

- RVT, N->getOperand(0), false, SDLoc(N)).first;

+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;

}

//===----------------------------------------------------------------------===//

@@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {

dbgs() << "\n");

SDValue R = SDValue();

+ // See if the target wants to custom expand this node.

+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {

+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");

+ return;

+ }

switch (N->getOpcode()) {

// These opcodes cannot appear if promotion of FP16 is done in the backend

// instead of Clang

diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 15ac45c37c66..d5c1b539adbd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

@@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

Res = PromoteIntRes_BUILD_VECTOR(N); break;

case ISD::SCALAR_TO_VECTOR:

Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;

+ case ISD::SPLAT_VECTOR:

+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;

case ISD::CONCAT_VECTORS:

Res = PromoteIntRes_CONCAT_VECTORS(N); break;

@@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::ZERO_EXTEND:

case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::FP_TO_SINT:

case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;

@@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::UADDSAT:

case ISD::SSUBSAT:

case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

- case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;

case ISD::ABS: Res = PromoteIntRes_ABS(N); break;

case ISD::ATOMIC_LOAD:

@@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {

TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))

NewOpc = ISD::FP_TO_SINT;

- SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));

+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&

+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&

+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))

+ NewOpc = ISD::STRICT_FP_TO_SINT;

+ SDValue Res;

+ if (N->isStrictFPOpcode()) {

+ Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },

+ { N->getOperand(0), N->getOperand(1) });

+ // Legalize the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

+ } else

+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));

// Assert that the converted value fits in the original type. If it doesn't

// (eg: because the value being converted is too big), then the result of the

@@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {

// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:

// before legalization: fp-to-uint16, 65534. -> 0xfffe

// after legalization: fp-to-sint32, 65534. -> 0x0000fffe

- return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?

+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||

+ N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?

ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,

DAG.getValueType(N->getValueType(0).getScalarType()));

}

@@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {

N->getIndex(), N->getScale() };

SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),

N->getMemoryVT(), dl, Ops,

- N->getMemOperand());

+ N->getMemOperand(), N->getIndexType());

// Legalize the chain result - switch anything that used the old chain to

// use the new one.

ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

@@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {

}

SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {

- // For promoting iN -> iM, this can be expanded by

- // 1. ANY_EXTEND iN to iM

- // 2. SHL by M-N

- // 3. [US][ADD|SUB]SAT

- // 4. L/ASHR by M-N

+ // If the promoted type is legal, we can convert this to:

+ // 1. ANY_EXTEND iN to iM

+ // 2. SHL by M-N

+ // 3. [US][ADD|SUB]SAT

+ // 4. L/ASHR by M-N

+ // Else it is more efficient to convert this to a min and a max

+ // operation in the higher precision arithmetic.

SDLoc dl(N);

SDValue Op1 = N->getOperand(0);

SDValue Op2 = N->getOperand(1);

unsigned OldBits = Op1.getScalarValueSizeInBits();

unsigned Opcode = N->getOpcode();

- unsigned ShiftOp;

- switch (Opcode) {

- case ISD::SADDSAT:

- case ISD::SSUBSAT:

- ShiftOp = ISD::SRA;

- break;

- case ISD::UADDSAT:

- case ISD::USUBSAT:

- ShiftOp = ISD::SRL;

- break;

- default:

- llvm_unreachable("Expected opcode to be signed or unsigned saturation "

- "addition or subtraction");

- }

- SDValue Op1Promoted = GetPromotedInteger(Op1);

- SDValue Op2Promoted = GetPromotedInteger(Op2);

+ SDValue Op1Promoted, Op2Promoted;

+ if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {

+ Op1Promoted = ZExtPromotedInteger(Op1);

+ Op2Promoted = ZExtPromotedInteger(Op2);

+ } else {

+ Op1Promoted = SExtPromotedInteger(Op1);

+ Op2Promoted = SExtPromotedInteger(Op2);

+ }

EVT PromotedType = Op1Promoted.getValueType();

unsigned NewBits = PromotedType.getScalarSizeInBits();

- unsigned SHLAmount = NewBits - OldBits;

- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());

- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);

- Op1Promoted =

- DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);

- Op2Promoted =

- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);

- SDValue Result =

- DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);

- return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);

+ if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {

+ unsigned ShiftOp;

+ switch (Opcode) {

+ case ISD::SADDSAT:

+ case ISD::SSUBSAT:

+ ShiftOp = ISD::SRA;

+ break;

+ case ISD::UADDSAT:

+ case ISD::USUBSAT:

+ ShiftOp = ISD::SRL;

+ break;

+ default:

+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "

+ "addition or subtraction");

+ }

+ unsigned SHLAmount = NewBits - OldBits;

+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());

+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);

+ Op1Promoted =

+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);

+ Op2Promoted =

+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);

+ SDValue Result =

+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);

+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);

+ } else {

+ if (Opcode == ISD::USUBSAT) {

+ SDValue Max =

+ DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);

+ return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);

+ }

+ if (Opcode == ISD::UADDSAT) {

+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);

+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);

+ SDValue Add =

+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);

+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);

+ }

+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;

+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);

+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);

+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);

+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);

+ SDValue Result =

+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);

+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);

+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);

+ return Result;

+ }

}

SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {

@@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {

SDValue Op1Promoted, Op2Promoted;

bool Signed =

N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;

+ bool Saturating =

+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;

if (Signed) {

Op1Promoted = SExtPromotedInteger(N->getOperand(0));

Op2Promoted = SExtPromotedInteger(N->getOperand(1));

@@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {

unsigned DiffSize =

PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();

- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;

if (Saturating) {

// Promoting the operand and result values changes the saturation width,

// which is extends the values that we clamp to on saturation. This could be

@@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {

Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;

case ISD::SCALAR_TO_VECTOR:

Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;

+ case ISD::SPLAT_VECTOR:

+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;

case ISD::VSELECT:

case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;

case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;

@@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

- case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;

case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;

@@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {

GetPromotedInteger(N->getOperand(0))), 0);

}

+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {

+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the

+ // operand in place.

+ return SDValue(

+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);

SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {

assert(OpNo == 0 && "Only know how to promote the condition!");

SDValue Cond = N->getOperand(0);

@@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,

EVT DataVT = N->getValueType(0);

NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);

} else if (OpNo == 4) {

- // Need to sign extend the index since the bits will likely be used.

- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));

+ // The Index

+ if (N->isIndexSigned())

+ // Need to sign extend the index since the bits will likely be used.

+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));

+ else

+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));

} else

NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));

@@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,

EVT DataVT = N->getValue().getValueType();

NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);

} else if (OpNo == 4) {

- // Need to sign extend the index since the bits will likely be used.

- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));

+ // The Index

+ if (N->isIndexSigned())

+ // Need to sign extend the index since the bits will likely be used.

+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));

+ else

+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));

} else

NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));

return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);

@@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

- case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;

case ISD::VECREDUCE_ADD:

case ISD::VECREDUCE_MUL:

@@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,

RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,

Lo, Hi);

}

@@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,

RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,

+ TargetLowering::MakeLibCallOptions CallOptions;

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,

Lo, Hi);

}

@@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,

SDLoc dl(N);

EVT RetVT = N->getValueType(0);

- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,

Lo, Hi);

}

@@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,

SDLoc dl(N);

EVT RetVT = N->getValueType(0);

- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,

Lo, Hi);

}

@@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,

}

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,

Lo, Hi);

}

@@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,

SDValue LHS = N->getOperand(0);

SDValue RHS = N->getOperand(1);

uint64_t Scale = N->getConstantOperandVal(2);

- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;

- EVT BoolVT = getSetCCResultType(VT);

- SDValue Zero = DAG.getConstant(0, dl, VT);

+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||

+ N->getOpcode() == ISD::UMULFIXSAT);

+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||

+ N->getOpcode() == ISD::SMULFIXSAT);

+ // Handle special case when scale is equal to zero.

if (!Scale) {

SDValue Result;

if (!Saturating) {

Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);

} else {

- Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);

+ EVT BoolVT = getSetCCResultType(VT);

+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;

+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);

SDValue Product = Result.getValue(0);

SDValue Overflow = Result.getValue(1);

- APInt MinVal = APInt::getSignedMinValue(VTSize);

- APInt MaxVal = APInt::getSignedMaxValue(VTSize);

- SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);

- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);

- Result = DAG.getSelect(dl, VT, Overflow, Result, Product);

+ if (Signed) {

+ APInt MinVal = APInt::getSignedMinValue(VTSize);

+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);

+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);

+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

+ SDValue Zero = DAG.getConstant(0, dl, VT);

+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);

+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);

+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);

+ } else {

+ // For unsigned multiplication, we only need to check the max since we

+ // can't really overflow towards zero.

+ APInt MaxVal = APInt::getMaxValue(VTSize);

+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);

+ }

}

SplitInteger(Result, Lo, Hi);

return;

}

+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will

+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].

+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);

SDValue LL, LH, RL, RH;

GetExpandedInteger(LHS, LL, LH);

GetExpandedInteger(RHS, RL, RH);

SmallVector<SDValue, 4> Result;

- bool Signed = (N->getOpcode() == ISD::SMULFIX ||

- N->getOpcode() == ISD::SMULFIXSAT);

unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;

if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,

TargetLowering::MulExpansionKind::OnlyLegalOrCustom,

@@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,

"the size of the current value type");

EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());

- // Shift whole amount by scale.

- SDValue ResultLL = Result[0];

- SDValue ResultLH = Result[1];

- SDValue ResultHL = Result[2];

- SDValue ResultHH = Result[3];

- SDValue SatMax, SatMin;

- SDValue NVTZero = DAG.getConstant(0, dl, NVT);

- SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);

- EVT BoolNVT = getSetCCResultType(NVT);

- // After getting the multplication result in 4 parts, we need to perform a

+ // After getting the multiplication result in 4 parts, we need to perform a

// shift right by the amount of the scale to get the result in that scale.

+ //

// Let's say we multiply 2 64 bit numbers. The resulting value can be held in

// 128 bits that are cut into 4 32-bit parts:

@@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,

// |NVTSize-|

- // The resulting Lo and Hi will only need to be one of these 32-bit parts

- // after shifting.

+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But

+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift

+ // amount and get Lo and Hi using two funnel shifts. Or for the special case

+ // when Scale is a multiple of NVTSize we can just pick the result without

+ // shifting.

+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.

+ if (Scale % NVTSize) {

+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);

+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],

+ ShiftAmount);

+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],

+ ShiftAmount);

+ } else {

+ Lo = Result[Part0];

+ Hi = Result[Part0 + 1];

+ }

+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.

+ if (!Saturating)

+ return;

+ // Can not overflow when there is no integer part.

+ if (Scale == VTSize)

+ return;

+ // To handle saturation we must check for overflow in the multiplication.

+ //

+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)

+ // aren't all zeroes.

+ //

+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)

+ // aren't all ones or all zeroes.

+ //

+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the

+ // highest bit of HH determines saturation direction in the event of signed

+ // saturation.

+ SDValue ResultHL = Result[2];

+ SDValue ResultHH = Result[3];

+ SDValue SatMax, SatMin;

+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);

+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);

+ EVT BoolNVT = getSetCCResultType(NVT);

+ if (!Signed) {

+ if (Scale < NVTSize) {

+ // Overflow happened if ((HH | (HL >> Scale)) != 0).

+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,

+ DAG.getConstant(Scale, dl, ShiftTy));

+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);

+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);

+ } else if (Scale == NVTSize) {

+ // Overflow happened if (HH != 0).

+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);

+ } else if (Scale < VTSize) {

+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).

+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,

+ DAG.getConstant(Scale - NVTSize, dl,

+ ShiftTy));

+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);

+ } else

+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"

+ "(and saturation can't happen with Scale==VTSize).");

+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);

+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);

+ return;

+ }

if (Scale < NVTSize) {

- // If the scale is less than the size of the VT we expand to, the Hi and

- // Lo of the result will be in the first 2 parts of the result after

- // shifting right. This only requires shifting by the scale as far as the

- // third part in the result (ResultHL).

- SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);

- SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);

- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);

- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,

- DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));

- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);

- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,

- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));

- // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the

- // highest bit of HH determines saturation direction in the event of

- // saturation.

// The number of overflow bits we can check are VTSize - Scale + 1 (we

// include the sign bit). If these top bits are > 0, then we overflowed past

// the max value. If these top bits are < -1, then we overflowed past the

// min value. Otherwise, we did not overflow.

- if (Saturating) {

- unsigned OverflowBits = VTSize - Scale + 1;

- assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&

- "Extent of overflow bits must start within HL");

- SDValue HLHiMask = DAG.getConstant(

- APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);

- SDValue HLLoMask = DAG.getConstant(

- APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);

- // HH > 0 or HH == 0 && HL > HLLoMask

- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);

- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);

- SDValue HLPos =

- DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);

- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,

- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));

- // HH < -1 or HH == -1 && HL < HLHiMask

- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);

- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);

- SDValue HLNeg =

- DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);

- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,

- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));

- }

+ unsigned OverflowBits = VTSize - Scale + 1;

+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&

+ "Extent of overflow bits must start within HL");

+ SDValue HLHiMask = DAG.getConstant(

+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);

+ SDValue HLLoMask = DAG.getConstant(

+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);

+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).

+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);

+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);

+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);

+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,

+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));

+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).

+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);

+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);

+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);

+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,

+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));

} else if (Scale == NVTSize) {

- // If the scales are equal, Lo and Hi are ResultLH and Result HL,

- // respectively. Avoid shifting to prevent undefined behavior.

- Lo = ResultLH;

- Hi = ResultHL;

- // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.

- // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.

- if (Saturating) {

- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);

- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);

- SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);

- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,

- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));

- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);

- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);

- SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);

- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,

- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));

- }

+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).

+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);

+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);

+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);

+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,

+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));

+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).

+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);

+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);

+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);

+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,

+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));

} else if (Scale < VTSize) {

- // If the scale is instead less than the old VT size, but greater than or

- // equal to the expanded VT size, the first part of the result (ResultLL) is

- // no longer a part of Lo because it would be scaled out anyway. Instead we

- // can start shifting right from the fourth part (ResultHH) to the second

- // part (ResultLH), and Result LH will be the new Lo.

- SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);

- SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);

- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);

- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,

- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));

- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);

- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,

- DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));

// This is similar to the case when we saturate if Scale < NVTSize, but we

- // only need to chech HH.

- if (Saturating) {

- unsigned OverflowBits = VTSize - Scale + 1;

- SDValue HHHiMask = DAG.getConstant(

- APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);

- SDValue HHLoMask = DAG.getConstant(

- APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);

- SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);

- SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);

- }

- } else if (Scale == VTSize) {

- assert(

- !Signed &&

- "Only unsigned types can have a scale equal to the operand bit width");

- Lo = ResultHL;

- Hi = ResultHH;

- } else {

- llvm_unreachable("Expected the scale to be less than or equal to the width "

- "of the operands");

- }

+ // only need to check HH.

+ unsigned OverflowBits = VTSize - Scale + 1;

+ SDValue HHHiMask = DAG.getConstant(

+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);

+ SDValue HHLoMask = DAG.getConstant(

+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);

+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);

+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);

+ } else

+ llvm_unreachable("Illegal scale for signed fixed point mul.");

- if (Saturating) {

- APInt LHMax = APInt::getSignedMaxValue(NVTSize);

- APInt LLMax = APInt::getAllOnesValue(NVTSize);

- APInt LHMin = APInt::getSignedMinValue(NVTSize);

- Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);

- Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);

- Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);

- Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);

- }

+ // Saturate to signed maximum.

+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);

+ APInt MaxLo = APInt::getAllOnesValue(NVTSize);

+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);

+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);

+ // Saturate to signed minimum.

+ APInt MinHi = APInt::getSignedMinValue(NVTSize);

+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);

+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);

}

void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,

@@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,

LC = RTLIB::SDIV_I128;

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

}

void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,

@@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,

if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(isSigned);

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

return;

}

@@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,

LC = RTLIB::SREM_I128;

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

}

void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,

@@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,

LC = RTLIB::UDIV_I128;

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

}

void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,

@@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,

LC = RTLIB::UREM_I128;

assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");

- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);

+ TargetLowering::MakeLibCallOptions CallOptions;

+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);

}

void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,

@@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL &&

"Don't know how to expand this SINT_TO_FP!");

- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;

}

SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {

@@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {

RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);

assert(LC != RTLIB::UNKNOWN_LIBCALL &&

"Don't know how to expand this UINT_TO_FP!");

- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(true);

+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;

}

SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {

@@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {

return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);

}

+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {

+ SDLoc dl(N);

+ SDValue SplatVal = N->getOperand(0);

+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");

+ EVT OutVT = N->getValueType(0);

+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);

+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");

+ EVT NOutElemVT = NOutVT.getVectorElementType();

+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);

+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);

SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {

SDLoc dl(N);

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 14fd5be23ccb..b596c174a287 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

@@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {

for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {

SDValue Res(&Node, i);

- EVT VT = Res.getValueType();

bool Failed = false;

// Don't create a value in map.

auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;

@@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {

dbgs() << "Unprocessed value in a map!";

Failed = true;

}

- } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {

+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {

if (Mapped > 1) {

dbgs() << "Value with legal type was transformed!";

Failed = true;

}

} else {

- // If the value can be kept in HW registers, softening machinery can

- // leave it unchanged and don't put it to any map.

- if (Mapped == 0 &&

- !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&

- isLegalInHWReg(VT))) {

+ if (Mapped == 0) {

dbgs() << "Processed value not in any map!";

Failed = true;

} else if (Mapped & (Mapped - 1)) {

@@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() {

Changed = true;

goto NodeDone;

case TargetLowering::TypeSoftenFloat:

- Changed = SoftenFloatResult(N, i);

- if (Changed)

- goto NodeDone;

- // If not changed, the result type should be legally in register.

- assert(isLegalInHWReg(ResultVT) &&

- "Unchanged SoftenFloatResult should be legal in register!");

- goto ScanOperands;

+ SoftenFloatResult(N, i);

+ Changed = true;

+ goto NodeDone;

case TargetLowering::TypeExpandFloat:

ExpandFloatResult(N, i);

Changed = true;

@@ -439,15 +430,9 @@ NodeDone:

bool Failed = false;

// Check that all result types are legal.

- // A value type is illegal if its TypeAction is not TypeLegal,

- // and TLI.RegClassForVT does not have a register class for this type.

- // For example, the x86_64 target has f128 that is not TypeLegal,

- // to have softened operators, but it also has FR128 register class to

- // pass and return f128 values. Hence a legalized node can have f128 type.

if (!IgnoreNodeResults(&Node))

for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)

- if (!isTypeLegal(Node.getValueType(i)) &&

- !TLI.isTypeLegal(Node.getValueType(i))) {

+ if (!isTypeLegal(Node.getValueType(i))) {

dbgs() << "Result type " << i << " illegal: ";

Node.dump(&DAG);

Failed = true;

@@ -456,8 +441,7 @@ NodeDone:

// Check that all operand types are legal.

for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)

if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&

- !isTypeLegal(Node.getOperand(i).getValueType()) &&

- !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {

+ !isTypeLegal(Node.getOperand(i).getValueType())) {

dbgs() << "Operand type " << i << " illegal: ";

Node.getOperand(i).dump(&DAG);

Failed = true;

@@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {

}

void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {

- // f128 of x86_64 could be kept in SSE registers,

- // but sometimes softened to i128.

- assert((Result.getValueType() ==

- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||

- Op.getValueType() ==

- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&

+ assert(Result.getValueType() ==

+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&

"Invalid type for softened float");

AnalyzeNewValue(Result);

auto &OpIdEntry = SoftenedFloats[getTableId(Op)];

- // Allow repeated calls to save f128 type nodes

- // or any node with type that transforms to itself.

- // Many operations on these types are not softened.

- assert(((OpIdEntry == 0) ||

- Op.getValueType() ==

- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&

- "Node is already converted to integer!");

+ assert((OpIdEntry == 0) && "Node is already converted to integer!");

OpIdEntry = getTableId(Result);

}

@@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {

/// Convert the node into a libcall with the same prototype.

SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,

bool isSigned) {

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(isSigned);

unsigned NumOps = N->getNumOperands();

SDLoc dl(N);

if (NumOps == 0) {

- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,

+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,

dl).first;

} else if (NumOps == 1) {

SDValue Op = N->getOperand(0);

- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,

+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,

dl).first;

} else if (NumOps == 2) {

SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };

- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,

+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,

dl).first;

}

SmallVector<SDValue, 8> Ops(NumOps);

for (unsigned i = 0; i < NumOps; ++i)

Ops[i] = N->getOperand(i);

- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;

+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;

}

/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1d489b1b3a33..4afbae69128a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h

@@ -73,15 +73,6 @@ private:

return VT.isSimple() && TLI.isTypeLegal(VT);

}

- /// Return true if this type can be passed in registers.

- /// For example, x86_64's f128, should to be legally in registers

- /// and only some operations converted to library calls or integer

- /// bitwise operations.

- bool isLegalInHWReg(EVT VT) const {

- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);

- return VT == NVT && isSimpleLegalType(VT);

- }

EVT getSetCCResultType(EVT VT) const {

return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

}

@@ -306,6 +297,7 @@ private:

SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);

SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);

SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);

+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);

SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);

SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);

SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);

@@ -363,6 +355,7 @@ private:

SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);

SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);

SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);

+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);

SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);

SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);

SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);

@@ -472,14 +465,11 @@ private:

// Float to Integer Conversion Support: LegalizeFloatTypes.cpp

//===--------------------------------------------------------------------===//

- /// Given an operand Op of Float type, returns the integer if the Op is not

- /// supported in target HW and converted to the integer.

- /// The integer contains exactly the same bits as Op - only the type changed.

- /// For example, if Op is an f32 which was softened to an i32, then this

- /// method returns an i32, the bits of which coincide with those of Op.

- /// If the Op can be efficiently supported in target HW or the operand must

- /// stay in a register, the Op is not converted to an integer.

- /// In that case, the given op is returned.

+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an

+ /// integer of the same size, this returns the integer. The integer contains

+ /// exactly the same bits as Op - only the type changed. For example, if Op

+ /// is an f32 which was softened to an i32, then this method returns an i32,

+ /// the bits of which coincide with those of Op

SDValue GetSoftenedFloat(SDValue Op) {

TableId Id = getTableId(Op);

auto Iter = SoftenedFloats.find(Id);

@@ -494,19 +484,19 @@ private:

}

void SetSoftenedFloat(SDValue Op, SDValue Result);

- // Convert Float Results to Integer for Non-HW-supported Operations.

- bool SoftenFloatResult(SDNode *N, unsigned ResNo);

+ // Convert Float Results to Integer.

+ void SoftenFloatResult(SDNode *N, unsigned ResNo);

SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);

- SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_BITCAST(SDNode *N);

SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);

- SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);

SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);

- SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_FABS(SDNode *N);

SDValue SoftenFloatRes_FMINNUM(SDNode *N);

SDValue SoftenFloatRes_FMAXNUM(SDNode *N);

SDValue SoftenFloatRes_FADD(SDNode *N);

SDValue SoftenFloatRes_FCEIL(SDNode *N);

- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);

SDValue SoftenFloatRes_FCOS(SDNode *N);

SDValue SoftenFloatRes_FDIV(SDNode *N);

SDValue SoftenFloatRes_FEXP(SDNode *N);

@@ -518,7 +508,7 @@ private:

SDValue SoftenFloatRes_FMA(SDNode *N);

SDValue SoftenFloatRes_FMUL(SDNode *N);

SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);

- SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_FNEG(SDNode *N);

SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);

SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);

SDValue SoftenFloatRes_FP_ROUND(SDNode *N);

@@ -531,27 +521,17 @@ private:

SDValue SoftenFloatRes_FSQRT(SDNode *N);

SDValue SoftenFloatRes_FSUB(SDNode *N);

SDValue SoftenFloatRes_FTRUNC(SDNode *N);

- SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);

- SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);

- SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);

+ SDValue SoftenFloatRes_LOAD(SDNode *N);

+ SDValue SoftenFloatRes_SELECT(SDNode *N);

+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);

SDValue SoftenFloatRes_UNDEF(SDNode *N);

SDValue SoftenFloatRes_VAARG(SDNode *N);

SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);

- // Return true if we can skip softening the given operand or SDNode because

- // either it was soften before by SoftenFloatResult and references to the

- // operand were replaced by ReplaceValueWith or it's value type is legal in HW

- // registers and the operand can be left unchanged.

- bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);

- // Convert Float Operand to Integer for Non-HW-supported Operations.

+ // Convert Float Operand to Integer.

bool SoftenFloatOperand(SDNode *N, unsigned OpNo);

SDValue SoftenFloatOp_BITCAST(SDNode *N);

- SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);

SDValue SoftenFloatOp_BR_CC(SDNode *N);

- SDValue SoftenFloatOp_FABS(SDNode *N);

- SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);

- SDValue SoftenFloatOp_FNEG(SDNode *N);

SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);

SDValue SoftenFloatOp_FP_ROUND(SDNode *N);

SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);

@@ -559,7 +539,6 @@ private:

SDValue SoftenFloatOp_LLROUND(SDNode *N);

SDValue SoftenFloatOp_LRINT(SDNode *N);

SDValue SoftenFloatOp_LLRINT(SDNode *N);

- SDValue SoftenFloatOp_SELECT(SDNode *N);

SDValue SoftenFloatOp_SELECT_CC(SDNode *N);

SDValue SoftenFloatOp_SETCC(SDNode *N);

SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);

@@ -715,6 +694,7 @@ private:

bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);

SDValue ScalarizeVecOp_BITCAST(SDNode *N);

SDValue ScalarizeVecOp_UnaryOp(SDNode *N);

+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);

SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);

SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);

SDValue ScalarizeVecOp_VSELECT(SDNode *N);

@@ -830,6 +810,7 @@ private:

SDValue WidenVecRes_Ternary(SDNode *N);

SDValue WidenVecRes_Binary(SDNode *N);

SDValue WidenVecRes_BinaryCanTrap(SDNode *N);

+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);

SDValue WidenVecRes_StrictFP(SDNode *N);

SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);

SDValue WidenVecRes_Convert(SDNode *N);

@@ -933,6 +914,8 @@ private:

void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);

void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);

+ void SplitVSETCC(const SDNode *N);

//===--------------------------------------------------------------------===//

// Generic Expansion: LegalizeTypesGeneric.cpp

//===--------------------------------------------------------------------===//

diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 943f63f46c47..5562f400b6e1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

@@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {

case TargetLowering::TypePromoteFloat:

llvm_unreachable("Bitcast of a promotion-needing float should never need"

"expansion");

- case TargetLowering::TypeSoftenFloat: {

- // Expand the floating point operand only if it was converted to integers.

- // Otherwise, it is a legal type like f128 that can be saved in a register.

- auto SoftenedOp = GetSoftenedFloat(InOp);

- if (isLegalInHWReg(SoftenedOp.getValueType()))

- break;

- SplitInteger(SoftenedOp, Lo, Hi);

+ case TargetLowering::TypeSoftenFloat:

+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);

Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);

Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);

return;

- }

case TargetLowering::TypeExpandInteger:

case TargetLowering::TypeExpandFloat: {

auto &DL = DAG.getDataLayout();

@@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,

GetSplitOp(Op, Lo, Hi);

}

-static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,

- SelectionDAG &DAG) {

- SDLoc DL(N);

- EVT LoVT, HiVT;

- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));

- // Split the inputs.

- SDValue Lo, Hi, LL, LH, RL, RH;

- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);

- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);

- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));

- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));

- return std::make_pair(Lo, Hi);

void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {

SDValue LL, LH, RL, RH, CL, CH;

SDLoc dl(N);

@@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {

if (Cond.getValueType().isVector()) {

if (SDValue Res = WidenVSELECTAndMask(N))

std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);

- // It seems to improve code to generate two narrow SETCCs as opposed to

- // splitting a wide result vector.

- else if (Cond.getOpcode() == ISD::SETCC)

- std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);

// Check if there are already splitted versions of the vector available and

// use those instead of splitting the mask operand again.

else if (getTypeAction(Cond.getValueType()) ==

TargetLowering::TypeSplitVector)

GetSplitVector(Cond, CL, CH);

- else

+ // It seems to improve code to generate two narrow SETCCs as opposed to

+ // splitting a wide result vector.

+ else if (Cond.getOpcode() == ISD::SETCC) {

+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal

+ // type and the setcc result type is the same vXi1, then leave the setcc

+ // alone.

+ EVT CondLHSVT = Cond.getOperand(0).getValueType();

+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&

+ isTypeLegal(CondLHSVT) &&

+ getSetCCResultType(CondLHSVT) == Cond.getValueType())

+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);

+ else

+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);

+ } else

std::tie(CL, CH) = DAG.SplitVector(Cond, dl);

}

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 10b8b705869e..15c3a0b6cfad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

@@ -38,6 +38,7 @@

#include "llvm/IR/DataLayout.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Compiler.h"

+#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MachineValueType.h"

#include "llvm/Support/MathExtras.h"

@@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

case ISD::STRICT_FFLOOR:

case ISD::STRICT_FROUND:

case ISD::STRICT_FTRUNC:

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::STRICT_FP_ROUND:

case ISD::STRICT_FP_EXTEND:

- // These pseudo-ops get legalized as if they were their non-strict

- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT

- // is also legal, but if ISD::FSQRT requires expansion then so does

- // ISD::STRICT_FSQRT.

- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),

- Node->getValueType(0));

+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));

+ // If we're asked to expand a strict vector floating-point operation,

+ // by default we're going to simply unroll it. That is usually the

+ // best approach, except in the case where the resulting strict (scalar)

+ // operations would themselves use the fallback mutation to non-strict.

+ // In that specific case, just do the fallback on the vector op.

+ if (Action == TargetLowering::Expand &&

+ TLI.getStrictFPOperationAction(Node->getOpcode(),

+ Node->getValueType(0))

+ == TargetLowering::Legal) {

+ EVT EltVT = Node->getValueType(0).getVectorElementType();

+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)

+ == TargetLowering::Expand &&

+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)

+ == TargetLowering::Legal)

+ Action = TargetLowering::Legal;

+ }

break;

case ISD::ADD:

case ISD::SUB:

@@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {

break;

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

- case ISD::UMULFIX: {

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT: {

unsigned Scale = Node->getConstantOperandVal(2);

Action = TLI.getFixedPointOperationAction(Node->getOpcode(),

Node->getValueType(0), Scale);

break;

}

- case ISD::FP_ROUND_INREG:

- Action = TLI.getOperationAction(Node->getOpcode(),

- cast<VTSDNode>(Node->getOperand(1))->getVT());

- break;

case ISD::SINT_TO_FP:

case ISD::UINT_TO_FP:

case ISD::VECREDUCE_ADD:

@@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) {

case ISD::SMULFIX:

case ISD::UMULFIX:

return ExpandFixedPointMul(Op);

+ case ISD::SMULFIXSAT:

+ case ISD::UMULFIXSAT:

+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly

+ // why. Maybe it results in worse codegen compared to the unroll for some

+ // targets? This should probably be investigated. And if we still prefer to

+ // unroll an explanation could be helpful.

+ return DAG.UnrollVectorOp(Op.getNode());

case ISD::STRICT_FADD:

case ISD::STRICT_FSUB:

case ISD::STRICT_FMUL:

@@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {

case ISD::STRICT_FFLOOR:

case ISD::STRICT_FROUND:

case ISD::STRICT_FTRUNC:

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

return ExpandStrictFPOp(Op);

case ISD::VECREDUCE_ADD:

case ISD::VECREDUCE_MUL:

@@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) {

SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {

// Attempt to expand using TargetLowering.

- SDValue Result;

- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))

+ SDValue Result, Chain;

+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {

+ if (Op.getNode()->isStrictFPOpcode())

+ // Relink the chain

+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);

return Result;

+ }

// Otherwise go ahead and unroll.

return DAG.UnrollVectorOp(Op.getNode());

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7e4d52617977..3763e886cef2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

@@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {

case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;

case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;

case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;

- case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;

case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;

case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;

case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;

@@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {

case ISD::STRICT_FFLOOR:

case ISD::STRICT_FROUND:

case ISD::STRICT_FTRUNC:

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::STRICT_FP_EXTEND:

R = ScalarizeVecRes_StrictFPOp(N);

break;

@@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

case ISD::UMULFIX:

+ case ISD::UMULFIXSAT:

R = ScalarizeVecRes_MULFIX(N);

break;

}

@@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {

case ISD::UINT_TO_FP:

Res = ScalarizeVecOp_UnaryOp(N);

break;

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);

+ break;

case ISD::CONCAT_VECTORS:

Res = ScalarizeVecOp_CONCAT_VECTORS(N);

break;

@@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {

return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);

}

+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.

+/// Do the strict FP operation on the element instead.

+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {

+ assert(N->getValueType(0).getVectorNumElements() == 1 &&

+ "Unexpected vector type!");

+ SDValue Elt = GetScalarizedVector(N->getOperand(1));

+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),

+ { N->getValueType(0).getScalarType(), MVT::Other },

+ { N->getOperand(0), Elt });

+ // Legalize the chain result - switch anything that used the old chain to

+ // use the new one.

+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

+ // Revectorize the result so the types line up with what the uses of this

+ // expression expect.

+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);

/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.

SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {

SmallVector<SDValue, 8> Ops(N->getNumOperands());

@@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;

case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;

case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;

- case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;

case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;

case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;

case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;

@@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

case ISD::FP_ROUND:

case ISD::STRICT_FP_ROUND:

case ISD::FP_TO_SINT:

+ case ISD::STRICT_FP_TO_SINT:

case ISD::FP_TO_UINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::FRINT:

case ISD::FROUND:

case ISD::FSIN:

@@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {

case ISD::SMULFIX:

case ISD::SMULFIXSAT:

case ISD::UMULFIX:

+ case ISD::UMULFIXSAT:

SplitVecRes_MULFIX(N, Lo, Hi);

break;

}

@@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,

// Split Mask operand

SDValue MaskLo, MaskHi;

- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

- GetSplitVector(Mask, MaskLo, MaskHi);

- else

- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

+ if (Mask.getOpcode() == ISD::SETCC) {

+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);

+ } else {

+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

+ GetSplitVector(Mask, MaskLo, MaskHi);

+ else

+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

+ }

EVT MemoryVT = MLD->getMemoryVT();

EVT LoMemVT, HiMemVT;

@@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,

// Split Mask operand

SDValue MaskLo, MaskHi;

- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

- GetSplitVector(Mask, MaskLo, MaskHi);

- else

- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

+ if (Mask.getOpcode() == ISD::SETCC) {

+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);

+ } else {

+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

+ GetSplitVector(Mask, MaskLo, MaskHi);

+ else

+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

+ }

EVT MemoryVT = MGT->getMemoryVT();

EVT LoMemVT, HiMemVT;

@@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,

SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};

Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,

- MMO);

+ MMO, MGT->getIndexType());

SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};

Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,

- MMO);

+ MMO, MGT->getIndexType());

// Build a factor node to remember that this load is independent of the

// other one.

@@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {

break;

case ISD::FP_TO_SINT:

case ISD::FP_TO_UINT:

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::CTTZ:

case ISD::CTLZ:

case ISD::CTPOP:

@@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,

SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};

SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,

- OpsLo, MMO);

+ OpsLo, MMO, MGT->getIndexType());

MMO = DAG.getMachineFunction().

getMachineMemOperand(MGT->getPointerInfo(),

@@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,

SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};

SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,

- OpsHi, MMO);

+ OpsHi, MMO, MGT->getIndexType());

// Build a factor node to remember that this load is independent of the

// other one.

@@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,

else

std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

+ // Split Mask operand

SDValue MaskLo, MaskHi;

- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

- // Split Mask operand

- GetSplitVector(Mask, MaskLo, MaskHi);

- else

- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {

+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);

+ } else {

+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

+ GetSplitVector(Mask, MaskLo, MaskHi);

+ else

+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

+ }

SDValue Lo, Hi;

MachineMemOperand *MMO = DAG.getMachineFunction().

@@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,

else

std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

+ // Split Mask operand

SDValue MaskLo, MaskHi;

- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

- // Split Mask operand

- GetSplitVector(Mask, MaskLo, MaskHi);

- else

- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {

+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);

+ } else {

+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)

+ GetSplitVector(Mask, MaskLo, MaskHi);

+ else

+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);

+ }

SDValue IndexHi, IndexLo;

if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)

@@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,

SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};

Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),

- DL, OpsLo, MMO);

+ DL, OpsLo, MMO, N->getIndexType());

MMO = DAG.getMachineFunction().

getMachineMemOperand(N->getPointerInfo(),

@@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,

// after another.

SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};

return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),

- DL, OpsHi, MMO);

+ DL, OpsHi, MMO, N->getIndexType());

}

SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {

@@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {

LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));

HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));

SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);

- return PromoteTargetBoolean(Con, N->getValueType(0));

+ EVT OpVT = N->getOperand(0).getValueType();

+ ISD::NodeType ExtendCode =

+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));

+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);

}

@@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;

case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;

case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;

- case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;

case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;

case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;

case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;

@@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

Res = WidenVecRes_BinaryCanTrap(N);

break;

+ case ISD::SMULFIX:

+ case ISD::SMULFIXSAT:

+ case ISD::UMULFIX:

+ case ISD::UMULFIXSAT:

+ // These are binary operations, but with an extra operand that shouldn't

+ // be widened (the scale).

+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);

+ break;

case ISD::STRICT_FADD:

case ISD::STRICT_FSUB:

case ISD::STRICT_FMUL:

@@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {

case ISD::STRICT_FP_EXTEND:

case ISD::STRICT_FP_ROUND:

+ case ISD::STRICT_FP_TO_SINT:

+ case ISD::STRICT_FP_TO_UINT:

Res = WidenVecRes_Convert_StrictFP(N);

break;

@@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {

return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());

}

+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {

+ // Binary op widening, but with an extra operand that shouldn't be widened.

+ SDLoc dl(N);

+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));

+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));

+ SDValue InOp3 = N->getOperand(2);

+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,

+ N->getFlags());

// Given a vector of operations that have been broken up to widen, see

// if we can collect them together into the next widest legal VT. This

// implementation is trap-safe.

@@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {

Scale };

SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),

N->getMemoryVT(), dl, Ops,

- N->getMemOperand());

+ N->getMemOperand(), N->getIndexType());

// Legalize the chain result - switch anything that used the old chain to

// use the new one.

@@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {

case ISD::FP_EXTEND:

case ISD::STRICT_FP_EXTEND:

case ISD::FP_TO_SINT:

+ case ISD::STRICT_FP_TO_SINT:

case ISD::FP_TO_UINT:

+ case ISD::STRICT_FP_TO_UINT:

case ISD::SINT_TO_FP:

case ISD::UINT_TO_FP:

case ISD::TRUNCATE:

@@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {

SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,

Scale};

SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,

- MG->getMemOperand());

+ MG->getMemOperand(), MG->getIndexType());

ReplaceValueWith(SDValue(N, 1), Res.getValue(1));

ReplaceValueWith(SDValue(N, 0), Res.getValue(0));

return SDValue();

@@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {

Scale};

return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),

MSC->getMemoryVT(), SDLoc(N), Ops,

- MSC->getMemOperand());

+ MSC->getMemOperand(), MSC->getIndexType());

}

SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {

@@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {

ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,

DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));

- return PromoteTargetBoolean(CC, VT);

+ EVT OpVT = N->getOperand(0).getValueType();

+ ISD::NodeType ExtendCode =

+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));

+ return DAG.getNode(ExtendCode, dl, VT, CC);

}

SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {

@@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,

int LdWidth = LdVT.getSizeInBits();

int WidthDiff = WidenWidth - LdWidth;

- unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.

+ unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.

// Find the vector type that can load from.

EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2cb850fa1a3d..7ee44c808fcb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp

@@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,

// Check for def of register or earlyclobber register.

for (; NumVals; --NumVals, ++i) {

unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

- if (TargetRegisterInfo::isPhysicalRegister(Reg))

+ if (Register::isPhysicalRegister(Reg))

CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);

}

} else

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 34b4c8502353..ff806bdb822c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

@@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {

if (!Pred.isArtificial())

AddPredQueued(NewSU, Pred);

+ // Make sure the clone comes after the original. (InstrEmitter assumes

+ // this ordering.)

+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));

// Only copy scheduled successors. Cut them from old node's successor

// list and move them over.

SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;

@@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {

// Check for def of register or earlyclobber register.

for (; NumVals; --NumVals, ++i) {

unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();

- if (TargetRegisterInfo::isPhysicalRegister(Reg))

+ if (Register::isPhysicalRegister(Reg))

CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);

}

} else

@@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {

PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {

unsigned Reg =

cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg)) {

+ if (Register::isVirtualRegister(Reg)) {

RetVal = true;

continue;

}

@@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {

if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {

unsigned Reg =

cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg)) {

+ if (Register::isVirtualRegister(Reg)) {

RetVal = true;

continue;

}

@@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {

// like other nodes from the perspective of scheduling heuristics.

if (SDNode *N = SU.getNode())

if (N->getOpcode() == ISD::CopyToReg &&

- TargetRegisterInfo::isVirtualRegister

- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))

+ Register::isVirtualRegister(

+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))

continue;

SDNode *PredFrameSetup = nullptr;

@@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {

// like other nodes from the perspective of scheduling heuristics.

if (SDNode *N = SU.getNode())

if (N->getOpcode() == ISD::CopyFromReg &&

- TargetRegisterInfo::isVirtualRegister

- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))

+ Register::isVirtualRegister(

+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))

continue;

// Perform checks on the successors of PredSU.

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 568c6191e512..d4c1fb36475e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

@@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,

return;

unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg))

+ if (Register::isVirtualRegister(Reg))

return;

unsigned ResNo = User->getOperand(2).getResNo();

@@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {

/// are input. This SUnit graph is similar to the SelectionDAG, but

/// excludes nodes that aren't interesting to scheduling, and represents

/// glued together nodes with a single SUnit.

-void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {

+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {

// Cluster certain nodes which should be scheduled together.

ClusterNodes();

// Populate the SUnits array.

@@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,

if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&

!BB->succ_empty()) {

unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg))

+ if (Register::isVirtualRegister(Reg))

// This copy is a liveout value. It is likely coalesced, so reduce the

// latency so not to penalize the def.

// FIXME: need target specific adjustment here?

@@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,

} else {

// Copy from physical register.

assert(I->getReg() && "Unknown physical register!");

- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);

+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);

bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;

(void)isNew; // Silence compiler warning.

assert(isNew && "Node emitted out of order - early");

@@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {

// Remember the source order of the inserted instruction.

if (HasDbg)

ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);

+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {

+ if (NewInsn && NewInsn->isCall())

+ MF.addCodeViewHeapAllocSite(NewInsn, MD);

+ }

GluedNodes.pop_back();

}

auto NewInsn =

@@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {

if (HasDbg)

ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,

NewInsn);

+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {

+ if (NewInsn && NewInsn->isCall())

+ MF.addCodeViewHeapAllocSite(NewInsn, MD);

+ }

}

// Insert all the dbg_values which have not already been inserted in source

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5163b4fa4fd3..183ce4b0652d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h

@@ -26,6 +26,7 @@

namespace llvm {

+class AAResults;

class InstrItineraryData;

/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.

@@ -93,7 +94,7 @@ class InstrItineraryData;

/// are input. This SUnit graph is similar to the SelectionDAG, but

/// excludes nodes that aren't interesting to scheduling, and represents

/// flagged together nodes with a single SUnit.

- void BuildSchedGraph(AliasAnalysis *AA);

+ void BuildSchedGraph(AAResults *AA);

/// InitNumRegDefsLeft - Determine the # of regs defined by this node.

///

diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index ab06b55b49fd..e7bac73678a7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp

@@ -63,14 +63,13 @@ private:

/// HazardRec - The hazard recognizer to use.

ScheduleHazardRecognizer *HazardRec;

- /// AA - AliasAnalysis for making memory reference queries.

- AliasAnalysis *AA;

+ /// AA - AAResults for making memory reference queries.

+ AAResults *AA;

public:

- ScheduleDAGVLIW(MachineFunction &mf,

- AliasAnalysis *aa,

+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,

SchedulingPriorityQueue *availqueue)

- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {

+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {

const TargetSubtargetInfo &STI = mf.getSubtarget();

HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);

}

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5852e693fa9f..52a71b91d93f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

@@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {

break;

case ISD::TargetExternalSymbol: {

ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);

- Erased = TargetExternalSymbols.erase(

- std::pair<std::string,unsigned char>(ESN->getSymbol(),

- ESN->getTargetFlags()));

+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(

+ ESN->getSymbol(), ESN->getTargetFlags()));

break;

}

case ISD::MCSymbol: {

@@ -1084,6 +1083,7 @@ void SelectionDAG::clear() {

ExternalSymbols.clear();

TargetExternalSymbols.clear();

MCSymbols.clear();

+ SDCallSiteDbgInfo.clear();

std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),

static_cast<CondCodeSDNode*>(nullptr));

std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),

@@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,

SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,

EVT VT, int64_t Offset, bool isTargetGA,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

assert((TargetFlags == 0 || isTargetGA) &&

"Cannot set target flags on target-independent globals");

@@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {

}

SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

assert((TargetFlags == 0 || isTarget) &&

"Cannot set target flags on target-independent jump tables");

unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;

@@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,

SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,

unsigned Alignment, int Offset,

bool isTarget,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

assert((TargetFlags == 0 || isTarget) &&

"Cannot set target flags on target-independent globals");

if (Alignment == 0)

@@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,

SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,

unsigned Alignment, int Offset,

bool isTarget,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

assert((TargetFlags == 0 || isTarget) &&

"Cannot set target flags on target-independent globals");

if (Alignment == 0)

@@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,

}

SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

FoldingSetNodeID ID;

AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);

ID.AddInteger(Index);

@@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {

}

SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,

- unsigned char TargetFlags) {

+ unsigned TargetFlags) {

SDNode *&N =

- TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,

- TargetFlags)];

+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];

if (N) return SDValue(N, 0);

N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);

InsertNode(N);

@@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,

}

SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,

- int64_t Offset,

- bool isTarget,

- unsigned char TargetFlags) {

+ int64_t Offset, bool isTarget,

+ unsigned TargetFlags) {

unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;

FoldingSetNodeID ID;

@@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {

EVT VT = Node->getValueType(0);

SDValue Tmp1 = Node->getOperand(0);

SDValue Tmp2 = Node->getOperand(1);

- unsigned Align = Node->getConstantOperandVal(3);

+ const MaybeAlign MA(Node->getConstantOperandVal(3));

SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,

Tmp2, MachinePointerInfo(V));

SDValue VAList = VAListLoad;

- if (Align > TLI.getMinStackArgumentAlignment()) {

- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");

+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {

VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,

- getConstant(Align - 1, dl, VAList.getValueType()));

+ getConstant(MA->value() - 1, dl, VAList.getValueType()));

- VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,

- getConstant(-(int64_t)Align, dl, VAList.getValueType()));

+ VAList =

+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,

+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));

}

// Increment the pointer, VAList, to the next vaarg

@@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,

}

case ISD::OR:

case ISD::XOR:

- // If the LHS or RHS don't contribute bits to the or, drop them.

- if (MaskedValueIsZero(V.getOperand(0), DemandedBits))

- return V.getOperand(1);

- if (MaskedValueIsZero(V.getOperand(1), DemandedBits))

- return V.getOperand(0);

- break;

+ case ISD::SIGN_EXTEND_INREG:

+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,

+ *this, 0);

case ISD::SRL:

// Only look at single-use SRLs.

if (!V.getNode()->hasOneUse())

@@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,

return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);

break;

}

- case ISD::SIGN_EXTEND_INREG:

- EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();

- unsigned ExVTBits = ExVT.getScalarSizeInBits();

- // If none of the extended bits are demanded, eliminate the sextinreg.

- if (DemandedBits.getActiveBits() <= ExVTBits)

- return V.getOperand(0);

- break;

}

return SDValue();

}

@@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {

/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that

/// is less than the element bit-width of the shift node, return it.

static const APInt *getValidShiftAmountConstant(SDValue V) {

+ unsigned BitWidth = V.getScalarValueSizeInBits();

if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {

// Shifting more than the bitwidth is not valid.

const APInt &ShAmt = SA->getAPIntValue();

- if (ShAmt.ult(V.getScalarValueSizeInBits()))

+ if (ShAmt.ult(BitWidth))

return &ShAmt;

}

return nullptr;

}

+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less

+/// than the element bit-width of the shift node, return the minimum value.

+static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {

+ unsigned BitWidth = V.getScalarValueSizeInBits();

+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));

+ if (!BV)

+ return nullptr;

+ const APInt *MinShAmt = nullptr;

+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {

+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));

+ if (!SA)

+ return nullptr;

+ // Shifting more than the bitwidth is not valid.

+ const APInt &ShAmt = SA->getAPIntValue();

+ if (ShAmt.uge(BitWidth))

+ return nullptr;

+ if (MinShAmt && MinShAmt->ule(ShAmt))

+ continue;

+ MinShAmt = &ShAmt;

+ }

+ return MinShAmt;

/// Determine which bits of Op are known to be either zero or one and return

/// them in Known. For vectors, the known bits are those that are shared by

/// every vector element.

@@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

return Known;

}

- if (Depth == 6)

+ if (Depth >= MaxRecursionDepth)

return Known; // Limit search depth.

KnownBits Known2;

@@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

SDValue Src = Op.getOperand(0);

ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));

unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);

if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {

// Offset the demanded elts by the subvector index.

uint64_t Idx = SubIdx->getZExtValue();

- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);

- } else {

- Known = computeKnownBits(Src, Depth + 1);

+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

}

+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);

break;

}

case ISD::SCALAR_TO_VECTOR: {

@@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

Known.One.lshrInPlace(Shift);

// High bits are known zero.

Known.Zero.setHighBits(Shift);

- } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {

- // If the shift amount is a vector of constants see if we can bound

- // the number of upper zero bits.

- unsigned ShiftAmountMin = BitWidth;

- for (unsigned i = 0; i != BV->getNumOperands(); ++i) {

- if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {

- const APInt &ShAmt = C->getAPIntValue();

- if (ShAmt.ult(BitWidth)) {

- ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,

- ShAmt.getZExtValue());

- continue;

- }

- // Don't know anything.

- ShiftAmountMin = 0;

- break;

- }

- Known.Zero.setHighBits(ShiftAmountMin);

+ } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {

+ // Minimum shift high bits are known zero.

+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());

}

break;

case ISD::SRA:

@@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,

// If the first operand is non-negative or has all low bits zero, then

// the upper bits are all zero.

- if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))

+ if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))

Known.Zero |= ~LowBits;

// If the first operand is negative and not all low bits are zero, then

// the upper bits are all one.

- if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))

+ if (Known2.isNegative() && LowBits.intersects(Known2.One))

Known.One |= ~LowBits;

assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");

}

@@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

return Val.getNumSignBits();

}

- if (Depth == 6)

+ if (Depth >= MaxRecursionDepth)

return 1; // Limit search depth.

if (!DemandedElts)

@@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);

if (Tmp == 1) return 1; // Early out.

return std::min(Tmp, Tmp2)-1;

+ case ISD::MUL: {

+ // The output of the Mul can be at most twice the valid bits in the inputs.

+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);

+ if (SignBitsOp0 == 1)

+ break;

+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);

+ if (SignBitsOp1 == 1)

+ break;

+ unsigned OutValidBits =

+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);

+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;

+ }

case ISD::TRUNCATE: {

// Check if the sign bits of source go down as far as the truncated value.

unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();

@@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,

SDValue Src = Op.getOperand(0);

ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));

unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);

if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {

// Offset the demanded elts by the subvector index.

uint64_t Idx = SubIdx->getZExtValue();

- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);

+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

}

- return ComputeNumSignBits(Src, Depth + 1);

+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);

}

case ISD::CONCAT_VECTORS: {

// Determine the minimum number of sign bits across all demanded

@@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const

if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())

return true;

- if (Depth == 6)

+ if (Depth >= MaxRecursionDepth)

return false; // Limit search depth.

// TODO: Handle vectors.

@@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

return getUNDEF(VT);

// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0

- if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&

+ if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&

OpOpcode == ISD::FSUB)

return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),

Operand.getOperand(0), Flags);

@@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

if (N2C && N2C->isNullValue())

return N1;

break;

- case ISD::FP_ROUND_INREG: {

- EVT EVT = cast<VTSDNode>(N2)->getVT();

- assert(VT == N1.getValueType() && "Not an inreg round!");

- assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&

- "Cannot FP_ROUND_INREG integer types");

- assert(EVT.isVector() == VT.isVector() &&

- "FP_ROUND_INREG type should be vector iff the operand "

- "type is vector!");

- assert((!EVT.isVector() ||

- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&

- "Vector element counts must match in FP_ROUND_INREG");

- assert(EVT.bitsLE(VT) && "Not rounding down!");

- (void)EVT;

- if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.

- break;

- }

case ISD::FP_ROUND:

assert(VT.isFloatingPoint() &&

N1.getValueType().isFloatingPoint() &&

@@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

std::swap(N1, N2);

} else {

switch (Opcode) {

- case ISD::FP_ROUND_INREG:

case ISD::SIGN_EXTEND_INREG:

case ISD::SUB:

return getUNDEF(VT); // fold op(undef, arg2) -> undef

@@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,

static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,

SDValue Chain, SDValue Dst, SDValue Src,

- uint64_t Size, unsigned Align,

+ uint64_t Size, unsigned Alignment,

bool isVol, bool AlwaysInline,

MachinePointerInfo DstPtrInfo,

MachinePointerInfo SrcPtrInfo) {

@@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,

if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))

DstAlignCanChange = true;

unsigned SrcAlign = DAG.InferPtrAlignment(Src);

- if (Align > SrcAlign)

- SrcAlign = Align;

+ if (Alignment > SrcAlign)

+ SrcAlign = Alignment;

ConstantDataArraySlice Slice;

bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);

bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;

unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);

if (!TLI.findOptimalMemOpLowering(

- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),

+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),

(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,

/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,

/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),

@@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,

// realignment.

const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

if (!TRI->needsStackRealignment(MF))

- while (NewAlign > Align &&

- DL.exceedsNaturalStackAlignment(NewAlign))

- NewAlign /= 2;

+ while (NewAlign > Alignment &&

+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))

+ NewAlign /= 2;

- if (NewAlign > Align) {

+ if (NewAlign > Alignment) {

// Give the stack frame object a larger alignment if needed.

if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)

MFI.setObjectAlignment(FI->getIndex(), NewAlign);

- Align = NewAlign;

+ Alignment = NewAlign;

}

@@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,

}

Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);

if (Value.getNode()) {

- Store = DAG.getStore(Chain, dl, Value,

- DAG.getMemBasePlusOffset(Dst, DstOff, dl),

- DstPtrInfo.getWithOffset(DstOff), Align,

- MMOFlags);

+ Store = DAG.getStore(

+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),

+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);

OutChains.push_back(Store);

}

@@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,

Store = DAG.getTruncStore(

Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),

- DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);

+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);

OutStoreChains.push_back(Store);

}

SrcOff += VTSize;

@@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {

SDValue SelectionDAG::getMemIntrinsicNode(

unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,

EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,

- MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {

+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {

if (Align == 0) // Ensure that codegen never sees alignment 0

Align = getEVTAlignment(MemVT);

@@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,

createOperands(N, Ops);

}

InsertNode(N);

- return SDValue(N, 0);

+ SDValue V(N, 0);

+ NewSDValueDbgMsg(V, "Creating new node: ", this);

+ return V;

}

SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,

@@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,

SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,

ArrayRef<SDValue> Ops,

- MachineMemOperand *MMO) {

+ MachineMemOperand *MMO,

+ ISD::MemIndexType IndexType) {

assert(Ops.size() == 6 && "Incompatible number of operands");

FoldingSetNodeID ID;

AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);

ID.AddInteger(VT.getRawBits());

ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(

- dl.getIROrder(), VTs, VT, MMO));

+ dl.getIROrder(), VTs, VT, MMO, IndexType));

ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

void *IP = nullptr;

if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

@@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,

}

auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),

- VTs, VT, MMO);

+ VTs, VT, MMO, IndexType);

createOperands(N, Ops);

assert(N->getPassThru().getValueType() == N->getValueType(0) &&

@@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,

SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,

ArrayRef<SDValue> Ops,

- MachineMemOperand *MMO) {

+ MachineMemOperand *MMO,

+ ISD::MemIndexType IndexType) {

assert(Ops.size() == 6 && "Incompatible number of operands");

FoldingSetNodeID ID;

AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);

ID.AddInteger(VT.getRawBits());

ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(

- dl.getIROrder(), VTs, VT, MMO));

+ dl.getIROrder(), VTs, VT, MMO, IndexType));

ID.AddInteger(MMO->getPointerInfo().getAddrSpace());

void *IP = nullptr;

if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {

@@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,

return SDValue(E, 0);

}

auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),

- VTs, VT, MMO);

+ VTs, VT, MMO, IndexType);

createOperands(N, Ops);

assert(N->getMask().getValueType().getVectorNumElements() ==

@@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {

case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;

case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;

case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;

+ case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;

+ case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;

case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;

case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;

case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;

case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;

case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;

case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;

+ case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;

+ case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;

case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;

case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;

case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;

case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;

+ case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;

+ case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;

}

assert(Node->getNumValues() == 2 && "Unexpected number of results!");

@@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,

CSEMap.InsertNode(N, IP);

InsertNode(N);

+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);

return N;

}

@@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,

// TokenFactor.

SDValue OldChain = SDValue(OldLoad, 1);

SDValue NewChain = SDValue(NewMemOp.getNode(), 1);

- if (!OldLoad->hasAnyUseOfValue(1))

+ if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))

return NewChain;

SDValue TokenFactor =

@@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() {

GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,

const DebugLoc &DL,

const GlobalValue *GA, EVT VT,

- int64_t o, unsigned char TF)

+ int64_t o, unsigned TF)

: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {

TheGlobal = GA;

}

@@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,

// Loads don't have side effects, look through them.

if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {

- if (!Ld->isVolatile())

+ if (Ld->isUnordered())

return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);

}

return false;

@@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {

SDValue

SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,

- ArrayRef<ISD::NodeType> CandidateBinOps) {

+ ArrayRef<ISD::NodeType> CandidateBinOps,

+ bool AllowPartials) {

// The pattern must end in an extract from index 0.

if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

!isNullConstant(Extract->getOperand(1)))

return SDValue();

- SDValue Op = Extract->getOperand(0);

- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());

// Match against one of the candidate binary ops.

+ SDValue Op = Extract->getOperand(0);

if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {

return Op.getOpcode() == unsigned(BinOp);

}))

return SDValue();

+ // Floating-point reductions may require relaxed constraints on the final step

+ // of the reduction because they may reorder intermediate operations.

+ unsigned CandidateBinOp = Op.getOpcode();

+ if (Op.getValueType().isFloatingPoint()) {

+ SDNodeFlags Flags = Op->getFlags();

+ switch (CandidateBinOp) {

+ case ISD::FADD:

+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())

+ return SDValue();

+ break;

+ default:

+ llvm_unreachable("Unhandled FP opcode for binop reduction");

+ }

+ // Matching failed - attempt to see if we did enough stages that a partial

+ // reduction from a subvector is possible.

+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {

+ if (!AllowPartials || !Op)

+ return SDValue();

+ EVT OpVT = Op.getValueType();

+ EVT OpSVT = OpVT.getScalarType();

+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);

+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))

+ return SDValue();

+ BinOp = (ISD::NodeType)CandidateBinOp;

+ return getNode(

+ ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,

+ getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));

+ };

// At each stage, we're looking for something that looks like:

// %s = shufflevector <8 x i32> %op, <8 x i32> undef,

// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,

@@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,

// <4,5,6,7,u,u,u,u>

// <2,3,u,u,u,u,u,u>

// <1,u,u,u,u,u,u,u>

- unsigned CandidateBinOp = Op.getOpcode();

+ // While a partial reduction match would be:

+ // <2,3,u,u,u,u,u,u>

+ // <1,u,u,u,u,u,u,u>

+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());

+ SDValue PrevOp;

for (unsigned i = 0; i < Stages; ++i) {

+ unsigned MaskEnd = (1 << i);

if (Op.getOpcode() != CandidateBinOp)

- return SDValue();

+ return PartialReduction(PrevOp, MaskEnd);

SDValue Op0 = Op.getOperand(0);

SDValue Op1 = Op.getOperand(1);

@@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,

// The first operand of the shuffle should be the same as the other operand

// of the binop.

if (!Shuffle || Shuffle->getOperand(0) != Op)

- return SDValue();

+ return PartialReduction(PrevOp, MaskEnd);

// Verify the shuffle has the expected (at this stage of the pyramid) mask.

- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)

- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)

- return SDValue();

+ for (int Index = 0; Index < (int)MaskEnd; ++Index)

+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))

+ return PartialReduction(PrevOp, MaskEnd);

+ PrevOp = Op;

}

BinOp = (ISD::NodeType)CandidateBinOp;

@@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {

getShiftAmountOperand(Operands[0].getValueType(),

Operands[1])));

break;

- case ISD::SIGN_EXTEND_INREG:

- case ISD::FP_ROUND_INREG: {

+ case ISD::SIGN_EXTEND_INREG: {

EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();

Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,

Operands[0],

@@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,

int Dist) const {

if (LD->isVolatile() || Base->isVolatile())

return false;

+ // TODO: probably too restrictive for atomics, revisit

+ if (!LD->isSimple())

+ return false;

if (LD->isIndexed() || Base->isIndexed())

return false;

if (LD->getChain() != Base->getChain())

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 9592bc30a4e1..3a53ab9717a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

@@ -14,6 +14,7 @@

#include "llvm/CodeGen/SelectionDAGNodes.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/Support/Casting.h"

+#include "llvm/Support/Debug.h"

#include <cstdint>

using namespace llvm;

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e818dd27c05e..8c15563fcd23 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

@@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,

// If the source register was virtual and if we know something about it,

// add an assert node.

- if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||

+ if (!Register::isVirtualRegister(Regs[Part + i]) ||

!RegisterVT.isInteger())

continue;

@@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,

unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());

if (HasMatching)

Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);

- else if (!Regs.empty() &&

- TargetRegisterInfo::isVirtualRegister(Regs.front())) {

+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {

// Put the register class of the virtual registers in the flag word. That

// way, later passes can recompute register class constraints for inline

// assembly as well as normal instructions.

@@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {

// offsets to its parts don't wrap either.

SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);

- SDValue Val = RetOp.getValue(i);

+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);

if (MemVTs[i] != ValueVTs[i])

Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);

Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,

@@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {

if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {

Instruction::BinaryOps Opcode = BOp->getOpcode();

if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&

- !I.getMetadata(LLVMContext::MD_unpredictable) &&

+ !I.hasMetadata(LLVMContext::MD_unpredictable) &&

(Opcode == Instruction::And || Opcode == Instruction::Or)) {

FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,

Opcode,

@@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,

void

SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setDiscardResult(true);

SDValue Chain =

TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,

- None, false, getCurSDLoc(), false, false).second;

+ None, CallOptions, getCurSDLoc()).second;

// On PS4, the "return address" must still be within the calling function,

// even if it's at the very end, so emit an explicit TRAP here.

// Passing 'true' for doesNotReturn above won't generate the trap for us.

@@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,

MachineBasicBlock *SwitchBB) {

SDLoc dl = getCurSDLoc();

- // Subtract the minimum value

+ // Subtract the minimum value.

SDValue SwitchOp = getValue(B.SValue);

EVT VT = SwitchOp.getValueType();

- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,

- DAG.getConstant(B.First, dl, VT));

- // Check range

- const TargetLowering &TLI = DAG.getTargetLoweringInfo();

- SDValue RangeCmp = DAG.getSetCC(

- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

- Sub.getValueType()),

- Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);

+ SDValue RangeSub =

+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));

// Determine the type of the test operands.

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

bool UsePtrType = false;

- if (!TLI.isTypeLegal(VT))

+ if (!TLI.isTypeLegal(VT)) {

UsePtrType = true;

- else {

+ } else {

for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)

if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {

// Switch table case range are encoded into series of masks.

@@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,

break;

}

+ SDValue Sub = RangeSub;

if (UsePtrType) {

VT = TLI.getPointerTy(DAG.getDataLayout());

Sub = DAG.getZExtOrTrunc(Sub, dl, VT);

@@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,

MachineBasicBlock* MBB = B.Cases[0].ThisBB;

- addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);

+ if (!B.OmitRangeCheck)

+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);

addSuccessorWithProb(SwitchBB, MBB, B.Prob);

SwitchBB->normalizeSuccProbs();

- SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,

- MVT::Other, CopyTo, RangeCmp,

- DAG.getBasicBlock(B.Default));

+ SDValue Root = CopyTo;

+ if (!B.OmitRangeCheck) {

+ // Conditional branch to the default block.

+ SDValue RangeCmp = DAG.getSetCC(dl,

+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),

+ RangeSub.getValueType()),

+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),

+ ISD::SETUGT);

+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,

+ DAG.getBasicBlock(B.Default));

+ }

// Avoid emitting unnecessary branches to the next block.

if (MBB != NextBlock(SwitchBB))

- BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,

- DAG.getBasicBlock(MBB));

+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));

- DAG.setRoot(BrRange);

+ DAG.setRoot(Root);

}

/// visitBitTestCase - this function produces one "bit test"

@@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {

// We care about the legality of the operation after it has been type

// legalized.

- while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&

- VT != TLI.getTypeToTransformTo(Ctx, VT))

+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)

VT = TLI.getTypeToTransformTo(Ctx, VT);

// If the vselect is legal, assume we want to leave this as a vector setcc +

@@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {

void SelectionDAGBuilder::visitShuffleVector(const User &I) {

SDValue Src1 = getValue(I.getOperand(0));

SDValue Src2 = getValue(I.getOperand(1));

+ Constant *MaskV = cast<Constant>(I.getOperand(2));

SDLoc DL = getCurSDLoc();

- SmallVector<int, 8> Mask;

- ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);

- unsigned MaskNumElts = Mask.size();

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

EVT SrcVT = Src1.getValueType();

unsigned SrcNumElts = SrcVT.getVectorNumElements();

+ if (MaskV->isNullValue() && VT.isScalableVector()) {

+ // Canonical splat form of first element of first input vector.

+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,

+ SrcVT.getScalarType(), Src1,

+ DAG.getConstant(0, DL,

+ TLI.getVectorIdxTy(DAG.getDataLayout())));

+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));

+ return;

+ }

+ // For now, we only handle splats for scalable vectors.

+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation

+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.

+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");

+ SmallVector<int, 8> Mask;

+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);

+ unsigned MaskNumElts = Mask.size();

if (SrcNumElts == MaskNumElts) {

setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));

return;

@@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {

// Normalize Vector GEP - all scalar operands should be converted to the

// splat vector.

unsigned VectorWidth = I.getType()->isVectorTy() ?

- cast<VectorType>(I.getType())->getVectorNumElements() : 0;

+ I.getType()->getVectorNumElements() : 0;

if (VectorWidth && !N.getValueType().isVector()) {

LLVMContext &Context = *DAG.getContext();

@@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {

// If this is a scalar constant or a splat vector of constants,

// handle it quickly.

- const auto *CI = dyn_cast<ConstantInt>(Idx);

- if (!CI && isa<ConstantDataVector>(Idx) &&

- cast<ConstantDataVector>(Idx)->getSplatValue())

- CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());

+ const auto *C = dyn_cast<Constant>(Idx);

+ if (C && isa<VectorType>(C->getType()))

+ C = C->getSplatValue();

- if (CI) {

+ if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {

if (CI->isZero())

continue;

APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);

@@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {

DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :

DAG.getConstant(Offs, dl, IdxTy);

- // In an inbouds GEP with an offset that is nonnegative even when

+ // In an inbounds GEP with an offset that is nonnegative even when

// interpreted as signed, assume there is no unsigned overflow.

SDNodeFlags Flags;

if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())

@@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {

Type *Ty = I.getType();

bool isVolatile = I.isVolatile();

- bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;

- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;

+ bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);

+ bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);

bool isDereferenceable =

isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());

unsigned Alignment = I.getAlignment();

@@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {

SDValue Src = getValue(SrcV);

// Create a virtual register, then update the virtual register.

- unsigned VReg =

+ Register VReg =

SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());

// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue

// Chain can be getRoot or getControlRoot.

@@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {

"call visitLoadFromSwiftError when backend supports swifterror");

assert(!I.isVolatile() &&

- I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&

- I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&

+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&

+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&

"Support volatile, non temporal, invariant for load_from_swift_error");

const Value *SV = I.getOperand(0);

@@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {

auto MMOFlags = MachineMemOperand::MONone;

if (I.isVolatile())

MMOFlags |= MachineMemOperand::MOVolatile;

- if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)

+ if (I.hasMetadata(LLVMContext::MD_nontemporal))

MMOFlags |= MachineMemOperand::MONonTemporal;

MMOFlags |= TLI.getMMOFlags(I);

@@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,

// are looking for. If first operand of the GEP is a splat vector - we

// extract the splat value and use it as a uniform base.

// In all other cases the function returns 'false'.

-static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,

- SDValue &Scale, SelectionDAGBuilder* SDB) {

+static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,

+ ISD::MemIndexType &IndexType, SDValue &Scale,

+ SelectionDAGBuilder *SDB) {

SelectionDAG& DAG = SDB->DAG;

LLVMContext &Context = *DAG.getContext();

@@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,

// Ensure all the other indices are 0.

for (unsigned i = 1; i < FinalIndex; ++i) {

- auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));

- if (!C || !C->isZero())

+ auto *C = dyn_cast<Constant>(GEP->getOperand(i));

+ if (!C)

+ return false;

+ if (isa<VectorType>(C->getType()))

+ C = C->getSplatValue();

+ auto *CI = dyn_cast_or_null<ConstantInt>(C);

+ if (!CI || !CI->isZero())

return false;

}

@@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,

SDB->getCurSDLoc(), TLI.getPointerTy(DL));

Base = SDB->getValue(Ptr);

Index = SDB->getValue(IndexVal);

+ IndexType = ISD::SIGNED_SCALED;

if (!Index.getValueType().isVector()) {

unsigned GEPWidth = GEP->getType()->getVectorNumElements();

@@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {

SDValue Base;

SDValue Index;

+ ISD::MemIndexType IndexType;

SDValue Scale;

const Value *BasePtr = Ptr;

- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);

+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,

+ this);

const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;

MachineMemOperand *MMO = DAG.getMachineFunction().

@@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {

if (!UniformBase) {

Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));

Index = getValue(Ptr);

+ IndexType = ISD::SIGNED_SCALED;

Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));

}

SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };

SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,

- Ops, MMO);

+ Ops, MMO, IndexType);

DAG.setRoot(Scatter);

setValue(&I, Scatter);

}

@@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {

SDValue Root = DAG.getRoot();

SDValue Base;

SDValue Index;

+ ISD::MemIndexType IndexType;

SDValue Scale;

const Value *BasePtr = Ptr;

- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);

+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,

+ this);

bool ConstantMemory = false;

if (UniformBase && AA &&

AA->pointsToConstantMemory(

@@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {

if (!UniformBase) {

Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));

Index = getValue(Ptr);

+ IndexType = ISD::SIGNED_SCALED;

Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));

}

SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };

SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,

- Ops, MMO);

+ Ops, MMO, IndexType);

SDValue OutChain = Gather.getValue(1);

if (!ConstantMemory)

@@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {

auto Flags = MachineMemOperand::MOLoad;

if (I.isVolatile())

Flags |= MachineMemOperand::MOVolatile;

- if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)

+ if (I.hasMetadata(LLVMContext::MD_invariant_load))

Flags |= MachineMemOperand::MOInvariant;

if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),

DAG.getDataLayout()))

@@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {

AAMDNodes(), nullptr, SSID, Order);

InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);

- SDValue L =

- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,

- getValue(I.getPointerOperand()), MMO);

+ SDValue Ptr = getValue(I.getPointerOperand());

+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {

+ // TODO: Once this is better exercised by tests, it should be merged with

+ // the normal path for loads to prevent future divergence.

+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);

+ if (MemVT != VT)

+ L = DAG.getPtrExtOrTrunc(L, dl, VT);

+ setValue(&I, L);

+ SDValue OutChain = L.getValue(1);

+ if (!I.isUnordered())

+ DAG.setRoot(OutChain);

+ else

+ PendingLoads.push_back(OutChain);

+ return;

+ }

+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,

+ Ptr, MMO);

SDValue OutChain = L.getValue(1);

if (MemVT != VT)

@@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {

SDValue Val = getValue(I.getValueOperand());

if (Val.getValueType() != MemVT)

Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);

+ SDValue Ptr = getValue(I.getPointerOperand());

+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {

+ // TODO: Once this is better exercised by tests, it should be merged with

+ // the normal path for stores to prevent future divergence.

+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);

+ DAG.setRoot(S);

+ return;

+ }

SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,

- getValue(I.getPointerOperand()), Val, MMO);

+ Ptr, Val, MMO);

DAG.setRoot(OutChain);

@@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,

// Add all operands of the call to the operand list.

for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {

- SDValue Op = getValue(I.getArgOperand(i));

- Ops.push_back(Op);

+ const Value *Arg = I.getArgOperand(i);

+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {

+ Ops.push_back(getValue(Arg));

+ continue;

+ }

+ // Use TargetConstant instead of a regular constant for immarg.

+ EVT VT = TLI.getValueType(*DL, Arg->getType(), true);

+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {

+ assert(CI->getBitWidth() <= 64 &&

+ "large intrinsic immediates not handled");

+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));

+ } else {

+ Ops.push_back(

+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));

+ }

}

SmallVector<EVT, 4> ValueVTs;

@@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,

// This is target intrinsic that touches memory

AAMDNodes AAInfo;

I.getAAMetadata(AAInfo);

- Result =

- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,

- MachinePointerInfo(Info.ptrVal, Info.offset),

- Info.align, Info.flags, Info.size, AAInfo);

+ Result = DAG.getMemIntrinsicNode(

+ Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,

+ MachinePointerInfo(Info.ptrVal, Info.offset),

+ Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);

} else if (!HasChain) {

Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);

} else if (!I.getType()->isVoidTy()) {

@@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,

// Put the exponent in the right bit position for later addition to the

// final result:

- // #define LOG2OFe 1.4426950f

- // t0 = Op * LOG2OFe

+ // t0 = Op * log2(e)

// TODO: What fast-math-flags should be set here?

SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,

- getF32Constant(DAG, 0x3fb8aa3b, dl));

+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));

return getLimitedPrecisionExp2(t0, dl, DAG);

}

@@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,

LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

- // Scale the exponent by log(2) [0.69314718f].

+ // Scale the exponent by log(2).

SDValue Exp = GetExponent(DAG, Op1, TLI, dl);

- SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,

- getF32Constant(DAG, 0x3f317218, dl));

+ SDValue LogOfExponent =

+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,

+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));

// Get the significand and build it into a floating-point number with

// exponent of 1.

@@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,

return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);

}

-// getUnderlyingArgReg - Find underlying register used for a truncated or

-// bitcasted argument.

-static unsigned getUnderlyingArgReg(const SDValue &N) {

+// getUnderlyingArgRegs - Find underlying registers used for a truncated,

+// bitcasted, or split argument. Returns a list of <Register, size in bits>

+static void

+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,

+ const SDValue &N) {

switch (N.getOpcode()) {

- case ISD::CopyFromReg:

- return cast<RegisterSDNode>(N.getOperand(1))->getReg();

+ case ISD::CopyFromReg: {

+ SDValue Op = N.getOperand(1);

+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),

+ Op.getValueType().getSizeInBits());

+ return;

+ }

case ISD::BITCAST:

case ISD::AssertZext:

case ISD::AssertSext:

case ISD::TRUNCATE:

- return getUnderlyingArgReg(N.getOperand(0));

+ getUnderlyingArgRegs(Regs, N.getOperand(0));

+ return;

+ case ISD::BUILD_PAIR:

+ case ISD::BUILD_VECTOR:

+ case ISD::CONCAT_VECTORS:

+ for (SDValue Op : N->op_values())

+ getUnderlyingArgRegs(Regs, Op);

+ return;

default:

- return 0;

+ return;

}

@@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(

if (FI != std::numeric_limits<int>::max())

Op = MachineOperand::CreateFI(FI);

+ SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;

if (!Op && N.getNode()) {

- unsigned Reg = getUnderlyingArgReg(N);

- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {

+ getUnderlyingArgRegs(ArgRegsAndSizes, N);

+ Register Reg;

+ if (ArgRegsAndSizes.size() == 1)

+ Reg = ArgRegsAndSizes.front().first;

+ if (Reg && Reg.isVirtual()) {

MachineRegisterInfo &RegInfo = MF.getRegInfo();

- unsigned PR = RegInfo.getLiveInPhysReg(Reg);

+ Register PR = RegInfo.getLiveInPhysReg(Reg);

if (PR)

Reg = PR;

}

@@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(

}

if (!Op) {

+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg

+ auto splitMultiRegDbgValue

+ = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {

+ unsigned Offset = 0;

+ for (auto RegAndSize : SplitRegs) {

+ auto FragmentExpr = DIExpression::createFragmentExpression(

+ Expr, Offset, RegAndSize.second);

+ if (!FragmentExpr)

+ continue;

+ assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");

+ FuncInfo.ArgDbgValues.push_back(

+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,

+ RegAndSize.first, Variable, *FragmentExpr));

+ Offset += RegAndSize.second;

+ }

+ };

// Check if ValueMap has reg number.

- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);

+ DenseMap<const Value *, unsigned>::const_iterator

+ VMI = FuncInfo.ValueMap.find(V);

if (VMI != FuncInfo.ValueMap.end()) {

const auto &TLI = DAG.getTargetLoweringInfo();

RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,

V->getType(), getABIRegCopyCC(V));

if (RFV.occupiesMultipleRegs()) {

- unsigned Offset = 0;

- for (auto RegAndSize : RFV.getRegsAndSizes()) {

- Op = MachineOperand::CreateReg(RegAndSize.first, false);

- auto FragmentExpr = DIExpression::createFragmentExpression(

- Expr, Offset, RegAndSize.second);

- if (!FragmentExpr)

- continue;

- FuncInfo.ArgDbgValues.push_back(

- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,

- Op->getReg(), Variable, *FragmentExpr));

- Offset += RegAndSize.second;

- }

+ splitMultiRegDbgValue(RFV.getRegsAndSizes());

return true;

}

Op = MachineOperand::CreateReg(VMI->second, false);

IsIndirect = IsDbgDeclare;

+ } else if (ArgRegsAndSizes.size() > 1) {

+ // This was split due to the calling convention, and no virtual register

+ // mapping exists for the value.

+ splitMultiRegDbgValue(ArgRegsAndSizes);

+ return true;

}

@@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(

assert(Variable->isValidLocationForIntrinsic(DL) &&

"Expected inlined-at fields to agree");

IsIndirect = (Op->isReg()) ? IsIndirect : true;

+ if (IsIndirect)

+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});

FuncInfo.ArgDbgValues.push_back(

- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,

+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,

*Op, Variable, Expr));

return true;

@@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

return;

case Intrinsic::sponentry:

setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,

- TLI.getPointerTy(DAG.getDataLayout())));

+ TLI.getFrameIndexTy(DAG.getDataLayout())));

return;

case Intrinsic::frameaddress:

setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,

- TLI.getPointerTy(DAG.getDataLayout()),

+ TLI.getFrameIndexTy(DAG.getDataLayout()),

getValue(I.getArgOperand(0))));

return;

case Intrinsic::read_register: {

@@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

case Intrinsic::masked_compressstore:

visitMaskedStore(I, true /* IsCompressing */);

return;

- case Intrinsic::x86_mmx_pslli_w:

- case Intrinsic::x86_mmx_pslli_d:

- case Intrinsic::x86_mmx_pslli_q:

- case Intrinsic::x86_mmx_psrli_w:

- case Intrinsic::x86_mmx_psrli_d:

- case Intrinsic::x86_mmx_psrli_q:

- case Intrinsic::x86_mmx_psrai_w:

- case Intrinsic::x86_mmx_psrai_d: {

- SDValue ShAmt = getValue(I.getArgOperand(1));

- if (isa<ConstantSDNode>(ShAmt)) {

- visitTargetIntrinsic(I, Intrinsic);

- return;

- }

- unsigned NewIntrinsic = 0;

- EVT ShAmtVT = MVT::v2i32;

- switch (Intrinsic) {

- case Intrinsic::x86_mmx_pslli_w:

- NewIntrinsic = Intrinsic::x86_mmx_psll_w;

- break;

- case Intrinsic::x86_mmx_pslli_d:

- NewIntrinsic = Intrinsic::x86_mmx_psll_d;

- break;

- case Intrinsic::x86_mmx_pslli_q:

- NewIntrinsic = Intrinsic::x86_mmx_psll_q;

- break;

- case Intrinsic::x86_mmx_psrli_w:

- NewIntrinsic = Intrinsic::x86_mmx_psrl_w;

- break;

- case Intrinsic::x86_mmx_psrli_d:

- NewIntrinsic = Intrinsic::x86_mmx_psrl_d;

- break;

- case Intrinsic::x86_mmx_psrli_q:

- NewIntrinsic = Intrinsic::x86_mmx_psrl_q;

- break;

- case Intrinsic::x86_mmx_psrai_w:

- NewIntrinsic = Intrinsic::x86_mmx_psra_w;

- break;

- case Intrinsic::x86_mmx_psrai_d:

- NewIntrinsic = Intrinsic::x86_mmx_psra_d;

- break;

- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.

- }

- // The vector shift intrinsics with scalars uses 32b shift amounts but

- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits

- // to be zero.

- // We must do this early because v2i32 is not a legal type.

- SDValue ShOps[2];

- ShOps[0] = ShAmt;

- ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);

- ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);

- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());

- ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);

- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,

- DAG.getConstant(NewIntrinsic, sdl, MVT::i32),

- getValue(I.getArgOperand(0)), ShAmt);

- setValue(&I, Res);

- return;

- }

case Intrinsic::powi:

setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),

getValue(I.getArgOperand(1)), DAG));

@@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

case Intrinsic::experimental_constrained_fdiv:

case Intrinsic::experimental_constrained_frem:

case Intrinsic::experimental_constrained_fma:

+ case Intrinsic::experimental_constrained_fptosi:

+ case Intrinsic::experimental_constrained_fptoui:

case Intrinsic::experimental_constrained_fptrunc:

case Intrinsic::experimental_constrained_fpext:

case Intrinsic::experimental_constrained_sqrt:

@@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

case Intrinsic::experimental_constrained_log:

case Intrinsic::experimental_constrained_log10:

case Intrinsic::experimental_constrained_log2:

+ case Intrinsic::experimental_constrained_lrint:

+ case Intrinsic::experimental_constrained_llrint:

case Intrinsic::experimental_constrained_rint:

case Intrinsic::experimental_constrained_nearbyint:

case Intrinsic::experimental_constrained_maxnum:

case Intrinsic::experimental_constrained_minnum:

case Intrinsic::experimental_constrained_ceil:

case Intrinsic::experimental_constrained_floor:

+ case Intrinsic::experimental_constrained_lround:

+ case Intrinsic::experimental_constrained_llround:

case Intrinsic::experimental_constrained_round:

case Intrinsic::experimental_constrained_trunc:

visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));

@@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

Op3));

return;

}

+ case Intrinsic::umul_fix_sat: {

+ SDValue Op1 = getValue(I.getArgOperand(0));

+ SDValue Op2 = getValue(I.getArgOperand(1));

+ SDValue Op3 = getValue(I.getArgOperand(2));

+ setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,

+ Op3));

+ return;

+ }

case Intrinsic::stacksave: {

SDValue Op = getRoot();

Res = DAG.getNode(

@@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

DAG.setRoot(Res);

return;

}

- case Intrinsic::objectsize: {

- // If we don't know by now, we're never going to know.

- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));

- assert(CI && "Non-constant type in __builtin_object_size?");

- SDValue Arg = getValue(I.getCalledValue());

- EVT Ty = Arg.getValueType();

- if (CI->isZero())

- Res = DAG.getConstant(-1ULL, sdl, Ty);

- else

- Res = DAG.getConstant(0, sdl, Ty);

- setValue(&I, Res);

- return;

- }

+ case Intrinsic::objectsize:

+ llvm_unreachable("llvm.objectsize.* should have been lowered already");

case Intrinsic::is_constant:

- // If this wasn't constant-folded away by now, then it's not a

- // constant.

- setValue(&I, DAG.getConstant(0, sdl, MVT::i1));

- return;

+ llvm_unreachable("llvm.is.constant.* should have been lowered already");

case Intrinsic::annotation:

case Intrinsic::ptr_annotation:

@@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,

setValue(&I, Val);

return;

}

+ case Intrinsic::ptrmask: {

+ SDValue Ptr = getValue(I.getOperand(0));

+ SDValue Const = getValue(I.getOperand(1));

+ EVT DestVT =

+ EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));

+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,

+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));

+ return;

+ }

}

@@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(

case Intrinsic::experimental_constrained_fma:

Opcode = ISD::STRICT_FMA;

break;

+ case Intrinsic::experimental_constrained_fptosi:

+ Opcode = ISD::STRICT_FP_TO_SINT;

+ break;

+ case Intrinsic::experimental_constrained_fptoui:

+ Opcode = ISD::STRICT_FP_TO_UINT;

+ break;

case Intrinsic::experimental_constrained_fptrunc:

Opcode = ISD::STRICT_FP_ROUND;

break;

@@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(

case Intrinsic::experimental_constrained_log2:

Opcode = ISD::STRICT_FLOG2;

break;

+ case Intrinsic::experimental_constrained_lrint:

+ Opcode = ISD::STRICT_LRINT;

+ break;

+ case Intrinsic::experimental_constrained_llrint:

+ Opcode = ISD::STRICT_LLRINT;

+ break;

case Intrinsic::experimental_constrained_rint:

Opcode = ISD::STRICT_FRINT;

break;

@@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(

case Intrinsic::experimental_constrained_floor:

Opcode = ISD::STRICT_FFLOOR;

break;

+ case Intrinsic::experimental_constrained_lround:

+ Opcode = ISD::STRICT_LROUND;

+ break;

+ case Intrinsic::experimental_constrained_llround:

+ Opcode = ISD::STRICT_LLROUND;

+ break;

case Intrinsic::experimental_constrained_round:

Opcode = ISD::STRICT_FROUND;

break;

@@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,

if (SwiftErrorVal && TLI.supportSwiftError()) {

// Get the last element of InVals.

SDValue Src = CLI.InVals.back();

- unsigned VReg = SwiftError.getOrCreateVRegDefAt(

+ Register VReg = SwiftError.getOrCreateVRegDefAt(

CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);

SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);

DAG.setRoot(CopyNode);

@@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

// Compute the constraint code and ConstraintType to use.

TLI.ComputeConstraintToUse(T, SDValue());

+ if (T.ConstraintType == TargetLowering::C_Immediate &&

+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))

+ // We've delayed emitting a diagnostic like the "n" constraint because

+ // inlining could cause an integer showing up.

+ return emitInlineAsmError(

+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "

+ "integer constant expression");

ExtraInfo.update(T);

}

@@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

switch (OpInfo.Type) {

case InlineAsm::isOutput:

if (OpInfo.ConstraintType == TargetLowering::C_Memory ||

- (OpInfo.ConstraintType == TargetLowering::C_Other &&

+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||

+ OpInfo.ConstraintType == TargetLowering::C_Other) &&

OpInfo.isIndirect)) {

unsigned ConstraintID =

TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);

@@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

MVT::i32));

AsmNodeOperands.push_back(OpInfo.CallOperand);

break;

- } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&

+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||

+ OpInfo.ConstraintType == TargetLowering::C_Other) &&

!OpInfo.isIndirect) ||

OpInfo.ConstraintType == TargetLowering::C_Register ||

OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {

// Otherwise, this outputs to a register (directly for C_Register /

- // C_RegisterClass, and a target-defined fashion for C_Other). Find a

- // register that we can use.

+ // C_RegisterClass, and a target-defined fashion for

+ // C_Immediate/C_Other). Find a register that we can use.

if (OpInfo.AssignedRegs.Regs.empty()) {

emitInlineAsmError(

CS, "couldn't allocate output register for constraint '" +

@@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

}

// Treat indirect 'X' constraint as memory.

- if (OpInfo.ConstraintType == TargetLowering::C_Other &&

+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||

+ OpInfo.ConstraintType == TargetLowering::C_Other) &&

OpInfo.isIndirect)

OpInfo.ConstraintType = TargetLowering::C_Memory;

- if (OpInfo.ConstraintType == TargetLowering::C_Other) {

+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||

+ OpInfo.ConstraintType == TargetLowering::C_Other) {

std::vector<SDValue> Ops;

TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,

Ops, DAG);

if (Ops.empty()) {

+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)

+ if (isa<ConstantSDNode>(InOperandVal)) {

+ emitInlineAsmError(CS, "value out of range for constraint '" +

+ Twine(OpInfo.ConstraintCode) + "'");

+ return;

+ }

emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +

Twine(OpInfo.ConstraintCode) + "'");

return;

@@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

}

assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||

- OpInfo.ConstraintType == TargetLowering::C_Register) &&

+ OpInfo.ConstraintType == TargetLowering::C_Register ||

+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&

"Unknown constraint type!");

// TODO: Support this.

@@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {

Val = OpInfo.AssignedRegs.getCopyFromRegs(

DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());

break;

+ case TargetLowering::C_Immediate:

case TargetLowering::C_Other:

Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),

OpInfo, DAG);

@@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {

// Certain targets (such as MIPS), may have a different ABI alignment

// for a type depending on the context. Give the target a chance to

// specify the alignment it wants.

- unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);

+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));

if (Args[i].Ty->isPointerTy()) {

Flags.setPointer();

@@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {

FrameAlign = Args[i].Alignment;

else

FrameAlign = getByValTypeAlignment(ElementTy, DL);

- Flags.setByValAlign(FrameAlign);

+ Flags.setByValAlign(Align(FrameAlign));

}

if (Args[i].IsNest)

Flags.setNest();

@@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {

if (NumParts > 1 && j == 0)

MyFlags.Flags.setSplit();

else if (j != 0) {

- MyFlags.Flags.setOrigAlign(1);

+ MyFlags.Flags.setOrigAlign(Align::None());

if (j == NumParts - 1)

MyFlags.Flags.setSplitEnd();

}

@@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {

assert((Op.getOpcode() != ISD::CopyFromReg ||

cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&

"Copy from a reg to the same reg!");

- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");

+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// If this is an InlineAsm we have to match the registers required, not the

@@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

// Certain targets (such as MIPS), may have a different ABI alignment

// for a type depending on the context. Give the target a chance to

// specify the alignment it wants.

- unsigned OriginalAlignment =

- TLI->getABIAlignmentForCallingConv(ArgTy, DL);

+ const Align OriginalAlignment(

+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));

if (Arg.getType()->isPointerTy()) {

Flags.setPointer();

@@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

FrameAlign = Arg.getParamAlignment();

else

FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);

- Flags.setByValAlign(FrameAlign);

+ Flags.setByValAlign(Align(FrameAlign));

}

if (Arg.hasAttribute(Attribute::Nest))

Flags.setNest();

@@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

Flags.setOrigAlign(OriginalAlignment);

if (ArgCopyElisionCandidates.count(&Arg))

Flags.setCopyElisionCandidate();

+ if (Arg.hasAttribute(Attribute::Returned))

+ Flags.setReturned();

MVT RegisterVT = TLI->getRegisterTypeForCallingConv(

*CurDAG->getContext(), F.getCallingConv(), VT);

@@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

MyFlags.Flags.setSplit();

// if it isn't first piece, alignment must be 1

else if (i > 0) {

- MyFlags.Flags.setOrigAlign(1);

+ MyFlags.Flags.setOrigAlign(Align::None());

if (i == NumRegs - 1)

MyFlags.Flags.setSplitEnd();

}

@@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

MachineFunction& MF = SDB->DAG.getMachineFunction();

MachineRegisterInfo& RegInfo = MF.getRegInfo();

- unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));

+ Register SRetReg =

+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));

FuncInfo->DemoteRegister = SRetReg;

NewRoot =

SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);

@@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());

}

+ // Analyses past this point are naive and don't expect an assertion.

+ if (Res.getOpcode() == ISD::AssertZext)

+ Res = Res.getOperand(0);

// Update the SwiftErrorVRegDefMap.

if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {

unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg))

+ if (Register::isVirtualRegister(Reg))

SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),

Reg);

}

@@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {

// FIXME: This isn't very clean... it would be nice to make this more

// general.

unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();

- if (TargetRegisterInfo::isVirtualRegister(Reg)) {

+ if (Register::isVirtualRegister(Reg)) {

FuncInfo->ValueMap[&Arg] = Reg;

continue;

}

@@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,

break;

}

case CC_BitTests: {

- // FIXME: If Fallthrough is unreachable, skip the range check.

// FIXME: Optimize away range check based on pivot comparisons.

BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];

@@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,

BTB->DefaultProb -= DefaultProb / 2;

}

+ if (FallthroughUnreachable) {

+ // Skip the range check if the fallthrough block is unreachable.

+ BTB->OmitRangeCheck = true;

+ }

// If we're in the right place, emit the bit test header right now.

if (CurMBB == SwitchMBB) {

visitBitTestHeader(*BTB, SwitchMBB);

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0072e33f23b7..bfcf30b430b6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

@@ -426,7 +426,7 @@ public:

SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,

SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)

: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),

- SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),

+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),

SwiftError(swifterror) {}

void init(GCFunctionInfo *gfi, AliasAnalysis *AA,

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index da3049881d31..bc10f7621239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

@@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {

case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";

case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";

case ISD::VECTOR_SHUFFLE: return "vector_shuffle";

+ case ISD::SPLAT_VECTOR: return "splat_vector";

case ISD::CARRY_FALSE: return "carry_false";

case ISD::ADDC: return "addc";

case ISD::ADDE: return "adde";

@@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {

case ISD::SMULFIX: return "smulfix";

case ISD::SMULFIXSAT: return "smulfixsat";

case ISD::UMULFIX: return "umulfix";

+ case ISD::UMULFIXSAT: return "umulfixsat";

// Conversion operators.

case ISD::SIGN_EXTEND: return "sign_extend";

@@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {

case ISD::FP_ROUND: return "fp_round";

case ISD::STRICT_FP_ROUND: return "strict_fp_round";

case ISD::FLT_ROUNDS_: return "flt_rounds";

- case ISD::FP_ROUND_INREG: return "fp_round_inreg";

case ISD::FP_EXTEND: return "fp_extend";

case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";

case ISD::SINT_TO_FP: return "sint_to_fp";

case ISD::UINT_TO_FP: return "uint_to_fp";

case ISD::FP_TO_SINT: return "fp_to_sint";

+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";

case ISD::FP_TO_UINT: return "fp_to_uint";

+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";

case ISD::BITCAST: return "bitcast";

case ISD::ADDRSPACECAST: return "addrspacecast";

case ISD::FP16_TO_FP: return "fp16_to_fp";

case ISD::FP_TO_FP16: return "fp_to_fp16";

case ISD::LROUND: return "lround";

+ case ISD::STRICT_LROUND: return "strict_lround";

case ISD::LLROUND: return "llround";

+ case ISD::STRICT_LLROUND: return "strict_llround";

case ISD::LRINT: return "lrint";

+ case ISD::STRICT_LRINT: return "strict_lrint";

case ISD::LLRINT: return "llrint";

+ case ISD::STRICT_LLRINT: return "strict_llrint";

// Control flow instructions

case ISD::BR: return "br";

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf9f2c166e1..1f07a241a824 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

@@ -27,6 +27,7 @@

#include "llvm/Analysis/BranchProbabilityInfo.h"

#include "llvm/Analysis/CFG.h"

#include "llvm/Analysis/EHPersonalities.h"

+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

@@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

TII = MF->getSubtarget().getInstrInfo();

TLI = MF->getSubtarget().getTargetLowering();

RegInfo = &MF->getRegInfo();

- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();

+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);

GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;

- ORE = make_unique<OptimizationRemarkEmitter>(&Fn);

+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);

auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();

DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;

auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();

@@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

To = J->second;

}

// Make sure the new register has a sufficiently constrained register class.

- if (TargetRegisterInfo::isVirtualRegister(From) &&

- TargetRegisterInfo::isVirtualRegister(To))

+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))

MRI.constrainRegClass(To, MRI.getRegClass(From));

// Replace it.

@@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

bool hasFI = MI->getOperand(0).isFI();

hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();

- if (TargetRegisterInfo::isPhysicalRegister(Reg))

+ if (Register::isPhysicalRegister(Reg))

EntryMBB->insert(EntryMBB->begin(), MI);

else {

MachineInstr *Def = RegInfo->getVRegDef(Reg);

@@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

Def->getParent()->insert(std::next(InsertPos), MI);

} else

LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"

- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");

+ << Register::virtReg2Index(Reg) << "\n");

}

// If Reg is live-in then update debug info to track its copy in a vreg.

@@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {

To = J->second;

}

// Make sure the new register has a sufficiently constrained register class.

- if (TargetRegisterInfo::isVirtualRegister(From) &&

- TargetRegisterInfo::isVirtualRegister(To))

+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))

MRI.constrainRegClass(To, MRI.getRegClass(From));

// Replace it.

@@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {

continue;

unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();

- if (!TargetRegisterInfo::isVirtualRegister(DestReg))

+ if (!Register::isVirtualRegister(DestReg))

continue;

// Ignore non-integer values.

@@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {

// Make sure that the copy dest is not a vreg when the copy source is a

// physical register.

- if (!OPI2->isReg() ||

- (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&

- TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))

+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&

+ Register::isPhysicalRegister(OPI2->getReg())))

return false;

return true;

@@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {

SDLoc dl(Op);

MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));

const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));

- unsigned Reg =

+ Register Reg =

TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),

- *CurDAG);

+ CurDAG->getMachineFunction());

SDValue New = CurDAG->getCopyFromReg(

Op->getOperand(0), dl, Reg, Op->getValueType(0));

New->setNodeId(-1);

@@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {

SDLoc dl(Op);

MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));

const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));

- unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),

+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(),

Op->getOperand(2).getValueType(),

- *CurDAG);

+ CurDAG->getMachineFunction());

SDValue New = CurDAG->getCopyToReg(

Op->getOperand(0), dl, Reg, Op->getOperand(2));

New->setNodeId(-1);

@@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,

continue;

}

- case OPC_EmitCopyToReg: {

+ case OPC_EmitCopyToReg:

+ case OPC_EmitCopyToReg2: {

unsigned RecNo = MatcherTable[MatcherIndex++];

assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");

unsigned DestPhysReg = MatcherTable[MatcherIndex++];

+ if (Opcode == OPC_EmitCopyToReg2)

+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;

if (!InputChain.getNode())

InputChain = CurDAG->getEntryNode();

diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 395e9a8a4fc5..fad98b6f50dc 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

@@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,

// We use TargetFrameIndex so that isel will not select it into LEA

Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());

-#ifndef NDEBUG

// Right now we always allocate spill slots that are of the same

// size as the value we're about to spill (the size of spillee can

// vary since we spill vectors of pointers too). At some point we

@@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,

MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();

assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&

"Bad spill: stack slot does not match!");

-#endif

+ // Note: Using the alignment of the spill slot (rather than the abi or

+ // preferred alignment) is required for correctness when dealing with spill

+ // slots with preferred alignments larger than frame alignment..

auto &MF = Builder.DAG.getMachineFunction();

auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);

+ auto *StoreMMO =

+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,

+ MFI.getObjectSize(Index),

+ MFI.getObjectAlignment(Index));

Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,

- PtrInfo);

+ StoreMMO);

MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));

@@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {

return;

}

- SDValue SpillSlot =

- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());

+ unsigned Index = *DerivedPtrLocation;

+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());

// Note: We know all of these reloads are independent, but don't bother to

// exploit that chain wise. DAGCombine will happily do so as needed, so

// doing it here would be a small compile time win at most.

SDValue Chain = getRoot();

- SDValue SpillLoad =

- DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),

- Relocate.getType()),

- getCurSDLoc(), Chain, SpillSlot,

- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),

- *DerivedPtrLocation));

+ auto &MF = DAG.getMachineFunction();

+ auto &MFI = MF.getFrameInfo();

+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);

+ auto *LoadMMO =

+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,

+ MFI.getObjectSize(Index),

+ MFI.getObjectAlignment(Index));

+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),

+ Relocate.getType());

+ SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,

+ SpillSlot, LoadMMO);

DAG.setRoot(SpillLoad.getValue(1));

diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b260cd91d468..9ab1324533f1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -11,7 +11,6 @@

//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/TargetLowering.h"

-#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

@@ -37,7 +36,7 @@ using namespace llvm;

/// NOTE: The TargetMachine owns TLOF.

TargetLowering::TargetLowering(const TargetMachine &tm)

- : TargetLoweringBase(tm) {}

+ : TargetLoweringBase(tm) {}

const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {

return nullptr;

@@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,

const CCValAssign &ArgLoc = ArgLocs[I];

if (!ArgLoc.isRegLoc())

continue;

- unsigned Reg = ArgLoc.getLocReg();

+ Register Reg = ArgLoc.getLocReg();

// Only look at callee saved registers.

if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))

continue;

@@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,

/// result of type RetVT.

std::pair<SDValue, SDValue>

TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,

- ArrayRef<SDValue> Ops, bool isSigned,

- const SDLoc &dl, bool doesNotReturn,

- bool isReturnValueUsed,

- bool isPostTypeLegalization) const {

+ ArrayRef<SDValue> Ops,

+ MakeLibCallOptions CallOptions,

+ const SDLoc &dl) const {

TargetLowering::ArgListTy Args;

Args.reserve(Ops.size());

TargetLowering::ArgListEntry Entry;

- for (SDValue Op : Ops) {

- Entry.Node = Op;

+ for (unsigned i = 0; i < Ops.size(); ++i) {

+ SDValue NewOp = Ops[i];

+ Entry.Node = NewOp;

Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());

- Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);

- Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);

+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),

+ CallOptions.IsSExt);

+ Entry.IsZExt = !Entry.IsSExt;

+ if (CallOptions.IsSoften &&

+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {

+ Entry.IsSExt = Entry.IsZExt = false;

+ }

Args.push_back(Entry);

}

@@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,

Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());

TargetLowering::CallLoweringInfo CLI(DAG);

- bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);

+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);

+ bool zeroExtend = !signExtend;

+ if (CallOptions.IsSoften &&

+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {

+ signExtend = zeroExtend = false;

+ }

CLI.setDebugLoc(dl)

.setChain(DAG.getEntryNode())

.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))

- .setNoReturn(doesNotReturn)

- .setDiscardResult(!isReturnValueUsed)

- .setIsPostTypeLegalization(isPostTypeLegalization)

+ .setNoReturn(CallOptions.DoesNotReturn)

+ .setDiscardResult(!CallOptions.IsReturnValueUsed)

+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)

.setSExtResult(signExtend)

- .setZExtResult(!signExtend);

+ .setZExtResult(zeroExtend);

return LowerCallTo(CLI);

}

@@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,

void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,

SDValue &NewLHS, SDValue &NewRHS,

ISD::CondCode &CCCode,

- const SDLoc &dl) const {

+ const SDLoc &dl, const SDValue OldLHS,

+ const SDValue OldRHS) const {

assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)

&& "Unsupported setcc type!");

@@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,

// Use the target specific return value for comparions lib calls.

EVT RetVT = getCmpLibcallReturnType();

SDValue Ops[2] = {NewLHS, NewRHS};

- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,

- dl).first;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ EVT OpsVT[2] = { OldLHS.getValueType(),

+ OldRHS.getValueType() };

+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);

+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;

NewRHS = DAG.getConstant(0, dl, RetVT);

CCCode = getCmpLibcallCC(LC1);

@@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,

ISD::SETCC, dl,

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),

NewLHS, NewRHS, DAG.getCondCode(CCCode));

- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,

- dl).first;

+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;

NewLHS = DAG.getNode(

ISD::SETCC, dl,

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),

@@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,

AssumeSingleUse);

}

+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?

+// TODO: Under what circumstances can we create nodes? Constant folding?

+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(

+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,

+ SelectionDAG &DAG, unsigned Depth) const {

+ // Limit search depth.

+ if (Depth >= SelectionDAG::MaxRecursionDepth)

+ return SDValue();

+ // Ignore UNDEFs.

+ if (Op.isUndef())

+ return SDValue();

+ // Not demanding any bits/elts from Op.

+ if (DemandedBits == 0 || DemandedElts == 0)

+ return DAG.getUNDEF(Op.getValueType());

+ unsigned NumElts = DemandedElts.getBitWidth();

+ KnownBits LHSKnown, RHSKnown;

+ switch (Op.getOpcode()) {

+ case ISD::BITCAST: {

+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));

+ EVT SrcVT = Src.getValueType();

+ EVT DstVT = Op.getValueType();

+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();

+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();

+ if (NumSrcEltBits == NumDstEltBits)

+ if (SDValue V = SimplifyMultipleUseDemandedBits(

+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))

+ return DAG.getBitcast(DstVT, V);

+ // TODO - bigendian once we have test coverage.

+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&

+ DAG.getDataLayout().isLittleEndian()) {

+ unsigned Scale = NumDstEltBits / NumSrcEltBits;

+ unsigned NumSrcElts = SrcVT.getVectorNumElements();

+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ for (unsigned i = 0; i != Scale; ++i) {

+ unsigned Offset = i * NumSrcEltBits;

+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);

+ if (!Sub.isNullValue()) {

+ DemandedSrcBits |= Sub;

+ for (unsigned j = 0; j != NumElts; ++j)

+ if (DemandedElts[j])

+ DemandedSrcElts.setBit((j * Scale) + i);

+ }

+ if (SDValue V = SimplifyMultipleUseDemandedBits(

+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))

+ return DAG.getBitcast(DstVT, V);

+ }

+ // TODO - bigendian once we have test coverage.

+ if ((NumSrcEltBits % NumDstEltBits) == 0 &&

+ DAG.getDataLayout().isLittleEndian()) {

+ unsigned Scale = NumSrcEltBits / NumDstEltBits;

+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;

+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);

+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);

+ for (unsigned i = 0; i != NumElts; ++i)

+ if (DemandedElts[i]) {

+ unsigned Offset = (i % Scale) * NumDstEltBits;

+ DemandedSrcBits.insertBits(DemandedBits, Offset);

+ DemandedSrcElts.setBit(i / Scale);

+ }

+ if (SDValue V = SimplifyMultipleUseDemandedBits(

+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))

+ return DAG.getBitcast(DstVT, V);

+ }

+ break;

+ }

+ case ISD::AND: {

+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

+ // If all of the demanded bits are known 1 on one side, return the other.

+ // These bits cannot contribute to the result of the 'and' in this

+ // context.

+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))

+ return Op.getOperand(0);

+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))

+ return Op.getOperand(1);

+ break;

+ }

+ case ISD::OR: {

+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

+ // If all of the demanded bits are known zero on one side, return the

+ // other. These bits cannot contribute to the result of the 'or' in this

+ // context.

+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))

+ return Op.getOperand(0);

+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))

+ return Op.getOperand(1);

+ break;

+ }

+ case ISD::XOR: {

+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);

+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

+ // If all of the demanded bits are known zero on one side, return the

+ // other.

+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))

+ return Op.getOperand(0);

+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))

+ return Op.getOperand(1);

+ break;

+ }

+ case ISD::SIGN_EXTEND_INREG: {

+ // If none of the extended bits are demanded, eliminate the sextinreg.

+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())

+ return Op.getOperand(0);

+ break;

+ }

+ case ISD::INSERT_VECTOR_ELT: {

+ // If we don't demand the inserted element, return the base vector.

+ SDValue Vec = Op.getOperand(0);

+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));

+ EVT VecVT = Vec.getValueType();

+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&

+ !DemandedElts[CIdx->getZExtValue()])

+ return Vec;

+ break;

+ }

+ case ISD::VECTOR_SHUFFLE: {

+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();

+ // If all the demanded elts are from one operand and are inline,

+ // then we can use the operand directly.

+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;

+ for (unsigned i = 0; i != NumElts; ++i) {

+ int M = ShuffleMask[i];

+ if (M < 0 || !DemandedElts[i])

+ continue;

+ AllUndef = false;

+ IdentityLHS &= (M == (int)i);

+ IdentityRHS &= ((M - NumElts) == i);

+ }

+ if (AllUndef)

+ return DAG.getUNDEF(Op.getValueType());

+ if (IdentityLHS)

+ return Op.getOperand(0);

+ if (IdentityRHS)

+ return Op.getOperand(1);

+ break;

+ }

+ default:

+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)

+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(

+ Op, DemandedBits, DemandedElts, DAG, Depth))

+ return V;

+ break;

+ }

+ return SDValue();

/// Look at Op. At this point, we know that only the OriginalDemandedBits of the

/// result of Op are ever used downstream. If we can use this information to

/// simplify Op, create a new simplified DAG node and return true, returning the

@@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits(

} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {

// Not demanding any bits/elts from Op.

return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

- } else if (Depth == 6) { // Limit search depth.

+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {

+ // Limit search depth.

return false;

}

KnownBits Known2, KnownOut;

switch (Op.getOpcode()) {

+ case ISD::TargetConstant:

+ llvm_unreachable("Can't simplify this node");

case ISD::SCALAR_TO_VECTOR: {

if (!DemandedElts[0])

return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));

@@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits(

}

break;

}

+ case ISD::EXTRACT_SUBVECTOR: {

+ // If index isn't constant, assume we need all the source vector elements.

+ SDValue Src = Op.getOperand(0);

+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));

+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();

+ APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);

+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {

+ // Offset the demanded elts by the subvector index.

+ uint64_t Idx = SubIdx->getZExtValue();

+ SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);

+ }

+ if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))

+ return true;

+ break;

+ }

case ISD::CONCAT_VECTORS: {

Known.Zero.setAllBits();

Known.One.setAllBits();

@@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits(

}

if (!!DemandedLHS || !!DemandedRHS) {

+ SDValue Op0 = Op.getOperand(0);

+ SDValue Op1 = Op.getOperand(1);

Known.Zero.setAllBits();

Known.One.setAllBits();

if (!!DemandedLHS) {

- if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,

- Known2, TLO, Depth + 1))

+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,

+ Depth + 1))

return true;

Known.One &= Known2.One;

Known.Zero &= Known2.Zero;

}

if (!!DemandedRHS) {

- if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,

- Known2, TLO, Depth + 1))

+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,

+ Depth + 1))

return true;

Known.One &= Known2.One;

Known.Zero &= Known2.Zero;

}

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);

+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);

+ if (DemandedOp0 || DemandedOp1) {

+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;

+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;

+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);

+ return TLO.CombineTo(Op, NewOp);

+ }

}

break;

}

@@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ if (DemandedOp0 || DemandedOp1) {

+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;

+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;

+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

+ return TLO.CombineTo(Op, NewOp);

+ }

// If all of the demanded bits are known one on one side, return the other.

// These bits cannot contribute to the result of the 'and'.

if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))

@@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ if (DemandedOp0 || DemandedOp1) {

+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;

+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;

+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

+ return TLO.CombineTo(Op, NewOp);

+ }

// If all of the demanded bits are known zero on one side, return the other.

// These bits cannot contribute to the result of the 'or'.

if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))

@@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

assert(!Known2.hasConflict() && "Bits known to be one AND zero?");

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);

+ if (DemandedOp0 || DemandedOp1) {

+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;

+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;

+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);

+ return TLO.CombineTo(Op, NewOp);

+ }

// If all of the demanded bits are known zero on one side, return the other.

// These bits cannot contribute to the result of the 'xor'.

if (DemandedBits.isSubsetOf(Known.Zero))

@@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits(

// out) are never demanded.

// TODO - support non-uniform vector amounts.

if (Op0.getOpcode() == ISD::SRL) {

- if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {

+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {

if (ConstantSDNode *SA2 =

isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {

if (SA2->getAPIntValue().ult(BitWidth)) {

@@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits(

if (Op0.getOpcode() == ISD::SHL) {

if (ConstantSDNode *SA2 =

isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {

- if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {

+ if (!DemandedBits.intersects(

+ APInt::getHighBitsSet(BitWidth, ShAmt))) {

if (SA2->getAPIntValue().ult(BitWidth)) {

unsigned C1 = SA2->getZExtValue();

unsigned Opc = ISD::SRL;

@@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

Known = Known.trunc(BitWidth);

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(

+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))

+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));

// If the input is only used by this truncate, see if we can shrink it based

// on the known demanded bits.

if (Src.getNode()->hasOneUse()) {

@@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits(

// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.

// Demand the elt/bit if any of the original elts/bits are demanded.

// TODO - bigendian once we have test coverage.

- // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.

- if (SrcVT.isVector() && NumSrcEltBits > 1 &&

- (BitWidth % NumSrcEltBits) == 0 &&

+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&

TLO.DAG.getDataLayout().isLittleEndian()) {

unsigned Scale = BitWidth / NumSrcEltBits;

unsigned NumSrcElts = SrcVT.getVectorNumElements();

@@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits(

// Add, Sub, and Mul don't demand any bits in positions beyond that

// of the highest bit demanded of them.

SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);

+ SDNodeFlags Flags = Op.getNode()->getFlags();

unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();

APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);

if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,

@@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits(

Depth + 1) ||

// See if the operation should be performed at a smaller bit width.

ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {

- SDNodeFlags Flags = Op.getNode()->getFlags();

if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {

// Disable the nsw and nuw flags. We can no longer guarantee that we

// won't wrap after simplification.

@@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits(

return true;

}

+ // Attempt to avoid multi-use ops if we don't need anything from them.

+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {

+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(

+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);

+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(

+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);

+ if (DemandedOp0 || DemandedOp1) {

+ Flags.setNoSignedWrap(false);

+ Flags.setNoUnsignedWrap(false);

+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;

+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;

+ SDValue NewOp =

+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);

+ return TLO.CombineTo(Op, NewOp);

+ }

// If we have a constant operand, we may be able to turn it into -1 if we

// do not demand the high bits. This can make the constant smaller to

// encode, allow more general folding, or match specialized instruction

@@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits(

if (C && !C->isAllOnesValue() && !C->isOne() &&

(C->getAPIntValue() | HighMask).isAllOnesValue()) {

SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);

- // We can't guarantee that the new math op doesn't wrap, so explicitly

- // clear those flags to prevent folding with a potential existing node

- // that has those flags set.

- SDNodeFlags Flags;

+ // Disable the nsw and nuw flags. We can no longer guarantee that we

+ // won't wrap after simplification.

Flags.setNoSignedWrap(false);

Flags.setNoUnsignedWrap(false);

SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);

@@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(

}

// Limit search depth.

- if (Depth >= 6)

+ if (Depth >= SelectionDAG::MaxRecursionDepth)

return false;

SDLoc DL(Op);

@@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(

return true;

APInt BaseElts = DemandedElts;

BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);

+ // If none of the base operand elements are demanded, replace it with undef.

+ if (!BaseElts && !Base.isUndef())

+ return TLO.CombineTo(Op,

+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,

+ TLO.DAG.getUNDEF(VT),

+ Op.getOperand(1),

+ Op.getOperand(2)));

if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,

Depth + 1))

return true;

@@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(

// Update legal shuffle masks based on demanded elements if it won't reduce

// to Identity which can cause premature removal of the shuffle mask.

- if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&

- isShuffleMaskLegal(NewMask, VT))

- return TLO.CombineTo(Op,

- TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),

- Op.getOperand(1), NewMask));

+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {

+ SDValue LegalShuffle =

+ buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),

+ NewMask, TLO.DAG);

+ if (LegalShuffle)

+ return TLO.CombineTo(Op, LegalShuffle);

+ }

// Propagate undef/zero elements from LHS/RHS.

for (unsigned i = 0; i != NumElts; ++i) {

@@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

Known.resetAll();

}

+void TargetLowering::computeKnownBitsForTargetInstr(

+ GISelKnownBits &Analysis, Register R, KnownBits &Known,

+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,

+ unsigned Depth) const {

+ Known.resetAll();

void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,

KnownBits &Known,

const APInt &DemandedElts,

@@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(

return false;

}

+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(

+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,

+ SelectionDAG &DAG, unsigned Depth) const {

+ assert(

+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||

+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||

+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&

+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"

+ " is a target node!");

+ return SDValue();

+SDValue

+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,

+ SDValue N1, MutableArrayRef<int> Mask,

+ SelectionDAG &DAG) const {

+ bool LegalMask = isShuffleMaskLegal(Mask, VT);

+ if (!LegalMask) {

+ std::swap(N0, N1);

+ ShuffleVectorSDNode::commuteMask(Mask);

+ LegalMask = isShuffleMaskLegal(Mask, VT);

+ }

+ if (!LegalMask)

+ return SDValue();

+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);

const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {

return nullptr;

}

@@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(

return T2;

}

+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0

+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(

+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,

+ DAGCombinerInfo &DCI, const SDLoc &DL) const {

+ assert(isConstOrConstSplat(N1C) &&

+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&

+ "Should be a comparison with 0.");

+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ "Valid only for [in]equality comparisons.");

+ unsigned NewShiftOpcode;

+ SDValue X, C, Y;

+ SelectionDAG &DAG = DCI.DAG;

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ // Look for '(C l>>/<< Y)'.

+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {

+ // The shift should be one-use.

+ if (!V.hasOneUse())

+ return false;

+ unsigned OldShiftOpcode = V.getOpcode();

+ switch (OldShiftOpcode) {

+ case ISD::SHL:

+ NewShiftOpcode = ISD::SRL;

+ break;

+ case ISD::SRL:

+ NewShiftOpcode = ISD::SHL;

+ break;

+ default:

+ return false; // must be a logical shift.

+ }

+ // We should be shifting a constant.

+ // FIXME: best to use isConstantOrConstantVector().

+ C = V.getOperand(0);

+ ConstantSDNode *CC =

+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);

+ if (!CC)

+ return false;

+ Y = V.getOperand(1);

+ ConstantSDNode *XC =

+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);

+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(

+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);

+ };

+ // LHS of comparison should be an one-use 'and'.

+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())

+ return SDValue();

+ X = N0.getOperand(0);

+ SDValue Mask = N0.getOperand(1);

+ // 'and' is commutative!

+ if (!Match(Mask)) {

+ std::swap(X, Mask);

+ if (!Match(Mask))

+ return SDValue();

+ }

+ EVT VT = X.getValueType();

+ // Produce:

+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0

+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);

+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);

+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);

+ return T2;

/// Try to fold an equality comparison with a {add/sub/xor} binary operation as

/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to

/// handle the commuted versions of these patterns.

@@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// (ctpop x) u< 2 -> (x & x-1) == 0

// (ctpop x) u> 1 -> (x & x-1) != 0

if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){

- SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,

- DAG.getConstant(1, dl, CTVT));

- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);

+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);

+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);

+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);

ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;

return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);

}

@@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));

APInt bestMask;

unsigned bestWidth = 0, bestOffset = 0;

- if (!Lod->isVolatile() && Lod->isUnindexed()) {

+ if (Lod->isSimple() && Lod->isUnindexed()) {

unsigned origWidth = N0.getValueSizeInBits();

unsigned maskWidth = origWidth;

// We can narrow (e.g.) 16-bit extending loads on 32-bit target to

@@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

}

+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {

+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0

+ if (C1.isNullValue())

+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(

+ VT, N0, N1, Cond, DCI, dl))

+ return CC;

+ }

// If we have "setcc X, C0", check to see if we can shrink the immediate

// by changing cc.

// TODO: Support this for vectors after legalize ops.

@@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

// Back to non-vector simplifications.

// TODO: Can we do these for vector splats?

if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

const APInt &C1 = N1C->getAPIntValue();

+ EVT ShValTy = N0.getValueType();

// Fold bit comparisons when we can.

if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

- (VT == N0.getValueType() ||

- (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&

+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&

N0.getOpcode() == ISD::AND) {

auto &DL = DAG.getDataLayout();

if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {

- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,

- !DCI.isBeforeLegalize());

+ EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());

if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3

// Perform the xform if the AND RHS is a single bit.

- if (AndRHS->getAPIntValue().isPowerOf2()) {

+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();

+ if (AndRHS->getAPIntValue().isPowerOf2() &&

+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {

return DAG.getNode(ISD::TRUNCATE, dl, VT,

- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,

- DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,

- ShiftTy)));

+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,

+ DAG.getConstant(ShCt, dl, ShiftTy)));

}

} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {

// (X & 8) == 8 --> (X & 8) >> 3

// Perform the xform if C1 is a single bit.

- if (C1.isPowerOf2()) {

+ unsigned ShCt = C1.logBase2();

+ if (C1.isPowerOf2() &&

+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {

return DAG.getNode(ISD::TRUNCATE, dl, VT,

- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,

- DAG.getConstant(C1.logBase2(), dl,

- ShiftTy)));

+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,

+ DAG.getConstant(ShCt, dl, ShiftTy)));

}

@@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,

}

// Fold remainder of division by a constant.

- if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&

- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&

+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {

AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();

// When division is cheap or optimizing for minimum size,

// fall through to DIVREM creation by skipping this fold.

- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))

- if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))

- return Folded;

+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {

+ if (N0.getOpcode() == ISD::UREM) {

+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))

+ return Folded;

+ } else if (N0.getOpcode() == ISD::SREM) {

+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))

+ return Folded;

+ }

}

// Fold away ALL boolean setcc's.

@@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {

if (S == 1) {

switch (Constraint[0]) {

default: break;

- case 'r': return C_RegisterClass;

+ case 'r':

+ return C_RegisterClass;

case 'm': // memory

case 'o': // offsetable

case 'V': // not offsetable

return C_Memory;

- case 'i': // Simple Integer or Relocatable Constant

case 'n': // Simple Integer

case 'E': // Floating Point Constant

case 'F': // Floating Point Constant

+ return C_Immediate;

+ case 'i': // Simple Integer or Relocatable Constant

case 's': // Relocatable Constant

case 'p': // Address.

case 'X': // Allow ANY value.

@@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,

/// Return an integer indicating how general CT is.

static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {

switch (CT) {

+ case TargetLowering::C_Immediate:

case TargetLowering::C_Other:

case TargetLowering::C_Unknown:

return 0;

@@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,

TargetLowering::ConstraintType CType =

TLI.getConstraintType(OpInfo.Codes[i]);

- // If this is an 'other' constraint, see if the operand is valid for it.

- // For example, on X86 we might have an 'rI' constraint. If the operand

- // is an integer in the range [0..31] we want to use I (saving a load

- // of a register), otherwise we must use 'r'.

- if (CType == TargetLowering::C_Other && Op.getNode()) {

+ // If this is an 'other' or 'immediate' constraint, see if the operand is

+ // valid for it. For example, on X86 we might have an 'rI' constraint. If

+ // the operand is an integer in the range [0..31] we want to use I (saving a

+ // load of a register), otherwise we must use 'r'.

+ if ((CType == TargetLowering::C_Other ||

+ CType == TargetLowering::C_Immediate) && Op.getNode()) {

assert(OpInfo.Codes[i].size() == 1 &&

"Unhandled multi-letter 'other' constraint");

std::vector<SDValue> ResultOps;

@@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,

return DAG.getSelect(dl, VT, IsOne, N0, Q);

}

+/// If all values in Values that *don't* match the predicate are same 'splat'

+/// value, then replace all values with that splat value.

+/// Else, if AlternativeReplacement was provided, then replace all values that

+/// do match predicate with AlternativeReplacement value.

+static void

+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,

+ std::function<bool(SDValue)> Predicate,

+ SDValue AlternativeReplacement = SDValue()) {

+ SDValue Replacement;

+ // Is there a value for which the Predicate does *NOT* match? What is it?

+ auto SplatValue = llvm::find_if_not(Values, Predicate);

+ if (SplatValue != Values.end()) {

+ // Does Values consist only of SplatValue's and values matching Predicate?

+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {

+ return Value == *SplatValue || Predicate(Value);

+ })) // Then we shall replace values matching predicate with SplatValue.

+ Replacement = *SplatValue;

+ }

+ if (!Replacement) {

+ // Oops, we did not find the "baseline" splat value.

+ if (!AlternativeReplacement)

+ return; // Nothing to do.

+ // Let's replace with provided value then.

+ Replacement = AlternativeReplacement;

+ }

+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);

/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE

/// where the divisor is constant and the comparison target is zero,

/// return a DAG expression that will generate the same comparison result

@@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,

DAGCombinerInfo &DCI, const SDLoc &DL,

SmallVectorImpl<SDNode *> &Created) const {

// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)

- // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1

+ // - D must be constant, with D = D0 * 2^K where D0 is odd

// - P is the multiplicative inverse of D0 modulo 2^W

- // - Q = floor((2^W - 1) / D0)

+ // - Q = floor(((2^W) - 1) / D)

// where W is the width of the common type of N and D.

assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

"Only applicable for (in)equality comparisons.");

+ SelectionDAG &DAG = DCI.DAG;

EVT VT = REMNode.getValueType();

+ EVT SVT = VT.getScalarType();

+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());

+ EVT ShSVT = ShVT.getScalarType();

// If MUL is unavailable, we cannot proceed in any case.

if (!isOperationLegalOrCustom(ISD::MUL, VT))

return SDValue();

- // TODO: Add non-uniform constant support.

- ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));

+ // TODO: Could support comparing with non-zero too.

ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);

- if (!Divisor || !CompTarget || Divisor->isNullValue() ||

- !CompTarget->isNullValue())

+ if (!CompTarget || !CompTarget->isNullValue())

return SDValue();

- const APInt &D = Divisor->getAPIntValue();

+ bool HadOneDivisor = false;

+ bool AllDivisorsAreOnes = true;

+ bool HadEvenDivisor = false;

+ bool AllDivisorsArePowerOfTwo = true;

+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts;

+ auto BuildUREMPattern = [&](ConstantSDNode *C) {

+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.

+ if (C->isNullValue())

+ return false;

+ const APInt &D = C->getAPIntValue();

+ // If all divisors are ones, we will prefer to avoid the fold.

+ HadOneDivisor |= D.isOneValue();

+ AllDivisorsAreOnes &= D.isOneValue();

+ // Decompose D into D0 * 2^K

+ unsigned K = D.countTrailingZeros();

+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");

+ APInt D0 = D.lshr(K);

+ // D is even if it has trailing zeros.

+ HadEvenDivisor |= (K != 0);

+ // D is a power-of-two if D0 is one.

+ // If all divisors are power-of-two, we will prefer to avoid the fold.

+ AllDivisorsArePowerOfTwo &= D0.isOneValue();

+ // P = inv(D0, 2^W)

+ // 2^W requires W + 1 bits, so we have to extend and then truncate.

+ unsigned W = D.getBitWidth();

+ APInt P = D0.zext(W + 1)

+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))

+ .trunc(W);

+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable

+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

+ // Q = floor((2^W - 1) / D)

+ APInt Q = APInt::getAllOnesValue(W).udiv(D);

+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&

+ "We are expecting that K is always less than all-ones for ShSVT");

+ // If the divisor is 1 the result can be constant-folded.

+ if (D.isOneValue()) {

+ // Set P and K amount to a bogus values so we can try to splat them.

+ P = 0;

+ K = -1;

+ assert(Q.isAllOnesValue() &&

+ "Expecting all-ones comparison for one divisor");

+ }

+ PAmts.push_back(DAG.getConstant(P, DL, SVT));

+ KAmts.push_back(

+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));

+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));

+ return true;

+ };

+ SDValue N = REMNode.getOperand(0);

+ SDValue D = REMNode.getOperand(1);

- // Decompose D into D0 * 2^K

- unsigned K = D.countTrailingZeros();

- bool DivisorIsEven = (K != 0);

- APInt D0 = D.lshr(K);

+ // Collect the values from each element.

+ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))

+ return SDValue();

- // The fold is invalid when D0 == 1.

- // This is reachable because visitSetCC happens before visitREM.

- if (D0.isOneValue())

+ // If this is a urem by a one, avoid the fold since it can be constant-folded.

+ if (AllDivisorsAreOnes)

return SDValue();

- // P = inv(D0, 2^W)

- // 2^W requires W + 1 bits, so we have to extend and then truncate.

- unsigned W = D.getBitWidth();

- APInt P = D0.zext(W + 1)

- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))

- .trunc(W);

- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable

- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

+ // If this is a urem by a powers-of-two, avoid the fold since it can be

+ // best implemented as a bit test.

+ if (AllDivisorsArePowerOfTwo)

+ return SDValue();

- // Q = floor((2^W - 1) / D)

- APInt Q = APInt::getAllOnesValue(W).udiv(D);

+ SDValue PVal, KVal, QVal;

+ if (VT.isVector()) {

+ if (HadOneDivisor) {

+ // Try to turn PAmts into a splat, since we don't care about the values

+ // that are currently '0'. If we can't, just keep '0'`s.

+ turnVectorIntoSplatVector(PAmts, isNullConstant);

+ // Try to turn KAmts into a splat, since we don't care about the values

+ // that are currently '-1'. If we can't, change them to '0'`s.

+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,

+ DAG.getConstant(0, DL, ShSVT));

+ }

- SelectionDAG &DAG = DCI.DAG;

+ PVal = DAG.getBuildVector(VT, DL, PAmts);

+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);

+ QVal = DAG.getBuildVector(VT, DL, QAmts);

+ } else {

+ PVal = PAmts[0];

+ KVal = KAmts[0];

+ QVal = QAmts[0];

+ }

- SDValue PVal = DAG.getConstant(P, DL, VT);

- SDValue QVal = DAG.getConstant(Q, DL, VT);

// (mul N, P)

- SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);

- Created.push_back(Op1.getNode());

+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);

+ Created.push_back(Op0.getNode());

- // Rotate right only if D was even.

- if (DivisorIsEven) {

+ // Rotate right only if any divisor was even. We avoid rotates for all-odd

+ // divisors as a performance improvement, since rotating by 0 is a no-op.

+ if (HadEvenDivisor) {

// We need ROTR to do this.

if (!isOperationLegalOrCustom(ISD::ROTR, VT))

return SDValue();

- SDValue ShAmt =

- DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));

SDNodeFlags Flags;

Flags.setExact(true);

// UREM: (rotr (mul N, P), K)

- Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);

- Created.push_back(Op1.getNode());

+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);

+ Created.push_back(Op0.getNode());

}

// UREM: (setule/setugt (rotr (mul N, P), K), Q)

- return DAG.getSetCC(DL, SETCCVT, Op1, QVal,

+ return DAG.getSetCC(DL, SETCCVT, Op0, QVal,

((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));

}

+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE

+/// where the divisor is constant and the comparison target is zero,

+/// return a DAG expression that will generate the same comparison result

+/// using only multiplications, additions and shifts/rotations.

+/// Ref: "Hacker's Delight" 10-17.

+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,

+ SDValue CompTargetNode,

+ ISD::CondCode Cond,

+ DAGCombinerInfo &DCI,

+ const SDLoc &DL) const {

+ SmallVector<SDNode *, 7> Built;

+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,

+ DCI, DL, Built)) {

+ assert(Built.size() <= 7 && "Max size prediction failed.");

+ for (SDNode *N : Built)

+ DCI.AddToWorklist(N);

+ return Folded;

+ }

+ return SDValue();

+SDValue

+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,

+ SDValue CompTargetNode, ISD::CondCode Cond,

+ DAGCombinerInfo &DCI, const SDLoc &DL,

+ SmallVectorImpl<SDNode *> &Created) const {

+ // Fold:

+ // (seteq/ne (srem N, D), 0)

+ // To:

+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)

+ //

+ // - D must be constant, with D = D0 * 2^K where D0 is odd

+ // - P is the multiplicative inverse of D0 modulo 2^W

+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))

+ // - Q = floor((2 * A) / (2^K))

+ // where W is the width of the common type of N and D.

+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&

+ "Only applicable for (in)equality comparisons.");

+ SelectionDAG &DAG = DCI.DAG;

+ EVT VT = REMNode.getValueType();

+ EVT SVT = VT.getScalarType();

+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());

+ EVT ShSVT = ShVT.getScalarType();

+ // If MUL is unavailable, we cannot proceed in any case.

+ if (!isOperationLegalOrCustom(ISD::MUL, VT))

+ return SDValue();

+ // TODO: Could support comparing with non-zero too.

+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);

+ if (!CompTarget || !CompTarget->isNullValue())

+ return SDValue();

+ bool HadIntMinDivisor = false;

+ bool HadOneDivisor = false;

+ bool AllDivisorsAreOnes = true;

+ bool HadEvenDivisor = false;

+ bool NeedToApplyOffset = false;

+ bool AllDivisorsArePowerOfTwo = true;

+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;

+ auto BuildSREMPattern = [&](ConstantSDNode *C) {

+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.

+ if (C->isNullValue())

+ return false;

+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.

+ // WARNING: this fold is only valid for positive divisors!

+ APInt D = C->getAPIntValue();

+ if (D.isNegative())

+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`

+ HadIntMinDivisor |= D.isMinSignedValue();

+ // If all divisors are ones, we will prefer to avoid the fold.

+ HadOneDivisor |= D.isOneValue();

+ AllDivisorsAreOnes &= D.isOneValue();

+ // Decompose D into D0 * 2^K

+ unsigned K = D.countTrailingZeros();

+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");

+ APInt D0 = D.lshr(K);

+ if (!D.isMinSignedValue()) {

+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case

+ // we don't care about this lane in this fold, we'll special-handle it.

+ HadEvenDivisor |= (K != 0);

+ }

+ // D is a power-of-two if D0 is one. This includes INT_MIN.

+ // If all divisors are power-of-two, we will prefer to avoid the fold.

+ AllDivisorsArePowerOfTwo &= D0.isOneValue();

+ // P = inv(D0, 2^W)

+ // 2^W requires W + 1 bits, so we have to extend and then truncate.

+ unsigned W = D.getBitWidth();

+ APInt P = D0.zext(W + 1)

+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))

+ .trunc(W);

+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable

+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");

+ // A = floor((2^(W - 1) - 1) / D0) & -2^K

+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);

+ A.clearLowBits(K);

+ if (!D.isMinSignedValue()) {

+ // If divisor INT_MIN, then we don't care about this lane in this fold,

+ // we'll special-handle it.

+ NeedToApplyOffset |= A != 0;

+ }

+ // Q = floor((2 * A) / (2^K))

+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));

+ assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&

+ "We are expecting that A is always less than all-ones for SVT");

+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&

+ "We are expecting that K is always less than all-ones for ShSVT");

+ // If the divisor is 1 the result can be constant-folded. Likewise, we

+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.

+ if (D.isOneValue()) {

+ // Set P, A and K to a bogus values so we can try to splat them.

+ P = 0;

+ A = -1;

+ K = -1;

+ // x ?% 1 == 0 <--> true <--> x u<= -1

+ Q = -1;

+ }

+ PAmts.push_back(DAG.getConstant(P, DL, SVT));

+ AAmts.push_back(DAG.getConstant(A, DL, SVT));

+ KAmts.push_back(

+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));

+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));

+ return true;

+ };

+ SDValue N = REMNode.getOperand(0);

+ SDValue D = REMNode.getOperand(1);

+ // Collect the values from each element.

+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))

+ return SDValue();

+ // If this is a srem by a one, avoid the fold since it can be constant-folded.

+ if (AllDivisorsAreOnes)

+ return SDValue();

+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold

+ // since it can be best implemented as a bit test.

+ if (AllDivisorsArePowerOfTwo)

+ return SDValue();

+ SDValue PVal, AVal, KVal, QVal;

+ if (VT.isVector()) {

+ if (HadOneDivisor) {

+ // Try to turn PAmts into a splat, since we don't care about the values

+ // that are currently '0'. If we can't, just keep '0'`s.

+ turnVectorIntoSplatVector(PAmts, isNullConstant);

+ // Try to turn AAmts into a splat, since we don't care about the

+ // values that are currently '-1'. If we can't, change them to '0'`s.

+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,

+ DAG.getConstant(0, DL, SVT));

+ // Try to turn KAmts into a splat, since we don't care about the values

+ // that are currently '-1'. If we can't, change them to '0'`s.

+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,

+ DAG.getConstant(0, DL, ShSVT));

+ }

+ PVal = DAG.getBuildVector(VT, DL, PAmts);

+ AVal = DAG.getBuildVector(VT, DL, AAmts);

+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);

+ QVal = DAG.getBuildVector(VT, DL, QAmts);

+ } else {

+ PVal = PAmts[0];

+ AVal = AAmts[0];

+ KVal = KAmts[0];

+ QVal = QAmts[0];

+ }

+ // (mul N, P)

+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);

+ Created.push_back(Op0.getNode());

+ if (NeedToApplyOffset) {

+ // We need ADD to do this.

+ if (!isOperationLegalOrCustom(ISD::ADD, VT))

+ return SDValue();

+ // (add (mul N, P), A)

+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);

+ Created.push_back(Op0.getNode());

+ }

+ // Rotate right only if any divisor was even. We avoid rotates for all-odd

+ // divisors as a performance improvement, since rotating by 0 is a no-op.

+ if (HadEvenDivisor) {

+ // We need ROTR to do this.

+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))

+ return SDValue();

+ SDNodeFlags Flags;

+ Flags.setExact(true);

+ // SREM: (rotr (add (mul N, P), A), K)

+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);

+ Created.push_back(Op0.getNode());

+ }

+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)

+ SDValue Fold =

+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,

+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));

+ // If we didn't have lanes with INT_MIN divisor, then we're done.

+ if (!HadIntMinDivisor)

+ return Fold;

+ // That fold is only valid for positive divisors. Which effectively means,

+ // it is invalid for INT_MIN divisors. So if we have such a lane,

+ // we must fix-up results for said lanes.

+ assert(VT.isVector() && "Can/should only get here for vectors.");

+ if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||

+ !isOperationLegalOrCustom(ISD::AND, VT) ||

+ !isOperationLegalOrCustom(Cond, VT) ||

+ !isOperationLegalOrCustom(ISD::VSELECT, VT))

+ return SDValue();

+ Created.push_back(Fold.getNode());

+ SDValue IntMin = DAG.getConstant(

+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);

+ SDValue IntMax = DAG.getConstant(

+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);

+ SDValue Zero =

+ DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);

+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.

+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);

+ Created.push_back(DivisorIsIntMin.getNode());

+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0

+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);

+ Created.push_back(Masked.getNode());

+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);

+ Created.push_back(MaskedIsZero.getNode());

+ // To produce final result we need to blend 2 vectors: 'SetCC' and

+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick

+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is

+ // constant-folded, select can get lowered to a shuffle with constant mask.

+ SDValue Blended =

+ DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);

+ return Blended;

bool TargetLowering::

verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {

if (!isa<ConstantSDNode>(Op.getOperand(0))) {

@@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {

return false;

}

+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,

+ bool LegalOperations, bool ForCodeSize,

+ unsigned Depth) const {

+ // fneg is removable even if it has multiple uses.

+ if (Op.getOpcode() == ISD::FNEG)

+ return 2;

+ // Don't allow anything with multiple uses unless we know it is free.

+ EVT VT = Op.getValueType();

+ const SDNodeFlags Flags = Op->getFlags();

+ const TargetOptions &Options = DAG.getTarget().Options;

+ if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&

+ isFPExtFree(VT, Op.getOperand(0).getValueType())))

+ return 0;

+ // Don't recurse exponentially.

+ if (Depth > SelectionDAG::MaxRecursionDepth)

+ return 0;

+ switch (Op.getOpcode()) {

+ case ISD::ConstantFP: {

+ if (!LegalOperations)

+ return 1;

+ // Don't invert constant FP values after legalization unless the target says

+ // the negated constant is legal.

+ return isOperationLegal(ISD::ConstantFP, VT) ||

+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,

+ ForCodeSize);

+ }

+ case ISD::BUILD_VECTOR: {

+ // Only permit BUILD_VECTOR of constants.

+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {

+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);

+ }))

+ return 0;

+ if (!LegalOperations)

+ return 1;

+ if (isOperationLegal(ISD::ConstantFP, VT) &&

+ isOperationLegal(ISD::BUILD_VECTOR, VT))

+ return 1;

+ return llvm::all_of(Op->op_values(), [&](SDValue N) {

+ return N.isUndef() ||

+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,

+ ForCodeSize);

+ });

+ }

+ case ISD::FADD:

+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

+ return 0;

+ // After operation legalization, it might not be legal to create new FSUBs.

+ if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))

+ return 0;

+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)

+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,

+ ForCodeSize, Depth + 1))

+ return V;

+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ case ISD::FSUB:

+ // We can't turn -(A-B) into B-A when we honor signed zeros.

+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

+ return 0;

+ // fold (fneg (fsub A, B)) -> (fsub B, A)

+ return 1;

+ case ISD::FMUL:

+ case ISD::FDIV:

+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))

+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,

+ ForCodeSize, Depth + 1))

+ return V;

+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.

+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))

+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)

+ return 0;

+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ case ISD::FMA:

+ case ISD::FMAD: {

+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())

+ return 0;

+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))

+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))

+ char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ if (!V2)

+ return 0;

+ // One of Op0/Op1 must be cheaply negatible, then select the cheapest.

+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ char V01 = std::max(V0, V1);

+ return V01 ? std::max(V01, V2) : 0;

+ }

+ case ISD::FP_EXTEND:

+ case ISD::FP_ROUND:

+ case ISD::FSIN:

+ return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ }

+ return 0;

+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,

+ bool LegalOperations,

+ bool ForCodeSize,

+ unsigned Depth) const {

+ // fneg is removable even if it has multiple uses.

+ if (Op.getOpcode() == ISD::FNEG)

+ return Op.getOperand(0);

+ assert(Depth <= SelectionDAG::MaxRecursionDepth &&

+ "getNegatedExpression doesn't match isNegatibleForFree");

+ const SDNodeFlags Flags = Op->getFlags();

+ switch (Op.getOpcode()) {

+ case ISD::ConstantFP: {

+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();

+ V.changeSign();

+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());

+ }

+ case ISD::BUILD_VECTOR: {

+ SmallVector<SDValue, 4> Ops;

+ for (SDValue C : Op->op_values()) {

+ if (C.isUndef()) {

+ Ops.push_back(C);

+ continue;

+ }

+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();

+ V.changeSign();

+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));

+ }

+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);

+ }

+ case ISD::FADD:

+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||

+ Flags.hasNoSignedZeros()) &&

+ "Expected NSZ fp-flag");

+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)

+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,

+ Depth + 1))

+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

+ getNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, ForCodeSize,

+ Depth + 1),

+ Op.getOperand(1), Flags);

+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)

+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

+ getNegatedExpression(Op.getOperand(1), DAG,

+ LegalOperations, ForCodeSize,

+ Depth + 1),

+ Op.getOperand(0), Flags);

+ case ISD::FSUB:

+ // fold (fneg (fsub 0, B)) -> B

+ if (ConstantFPSDNode *N0CFP =

+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))

+ if (N0CFP->isZero())

+ return Op.getOperand(1);

+ // fold (fneg (fsub A, B)) -> (fsub B, A)

+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),

+ Op.getOperand(1), Op.getOperand(0), Flags);

+ case ISD::FMUL:

+ case ISD::FDIV:

+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)

+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,

+ Depth + 1))

+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

+ getNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, ForCodeSize,

+ Depth + 1),

+ Op.getOperand(1), Flags);

+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))

+ return DAG.getNode(

+ Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),

+ getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1),

+ Flags);

+ case ISD::FMA:

+ case ISD::FMAD: {

+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||

+ Flags.hasNoSignedZeros()) &&

+ "Expected NSZ fp-flag");

+ SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ if (V0 >= V1) {

+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))

+ SDValue Neg0 = getNegatedExpression(

+ Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);

+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,

+ Op.getOperand(1), Neg2, Flags);

+ }

+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))

+ SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,

+ ForCodeSize, Depth + 1);

+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

+ Op.getOperand(0), Neg1, Neg2, Flags);

+ }

+ case ISD::FP_EXTEND:

+ case ISD::FSIN:

+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),

+ getNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, ForCodeSize,

+ Depth + 1));

+ case ISD::FP_ROUND:

+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),

+ getNegatedExpression(Op.getOperand(0), DAG,

+ LegalOperations, ForCodeSize,

+ Depth + 1),

+ Op.getOperand(1));

+ }

+ llvm_unreachable("Unknown code");

//===----------------------------------------------------------------------===//

// Legalization Utilities

//===----------------------------------------------------------------------===//

@@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,

bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,

SelectionDAG &DAG) const {

- SDValue Src = Node->getOperand(0);

+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;

+ SDValue Src = Node->getOperand(OpNo);

EVT SrcVT = Src.getValueType();

EVT DstVT = Node->getValueType(0);

SDLoc dl(SDValue(Node, 0));

@@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,

if (SrcVT != MVT::f32 || DstVT != MVT::i64)

return false;

+ if (Node->isStrictFPOpcode())

+ // When a NaN is converted to an integer a trap is allowed. We can't

+ // use this expansion here because it would eliminate that trap. Other

+ // traps are also allowed and cannot be eliminated. See

+ // IEEE 754-2008 sec 5.8.

+ return false;

// Expand f32 -> i64 conversion

// This algorithm comes from compiler-rt's implementation of fixsfdi:

// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c

@@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,

}

bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,

+ SDValue &Chain,

SelectionDAG &DAG) const {

SDLoc dl(SDValue(Node, 0));

- SDValue Src = Node->getOperand(0);

+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;

+ SDValue Src = Node->getOperand(OpNo);

EVT SrcVT = Src.getValueType();

EVT DstVT = Node->getValueType(0);

@@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,

getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);

// Only expand vector types if we have the appropriate vector bit operations.

- if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||

+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :

+ ISD::FP_TO_SINT;

+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||

!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))

return false;

@@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,

APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());

if (APFloat::opOverflow &

APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {

- Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);

+ if (Node->isStrictFPOpcode()) {

+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },

+ { Node->getOperand(0), Src });

+ Chain = Result.getValue(1);

+ } else

+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);

return true;

}

SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);

SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);

- bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);

+ bool Strict = Node->isStrictFPOpcode() ||

+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);

if (Strict) {

// Expand based on maximum range of FP_TO_SINT, if the value exceeds the

// signmask then offset (the result of which should be fully representable).

@@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,

// Result = fp_to_sint(Val) ^ Ofs

// TODO: Should any fast-math-flags be set for the FSUB?

- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,

- DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));

+ SDValue SrcBiased;

+ if (Node->isStrictFPOpcode())

+ SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },

+ { Node->getOperand(0), Src, Cst });

+ else

+ SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);

+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);

SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),

DAG.getConstant(SignMask, dl, DstVT));

- Result = DAG.getNode(ISD::XOR, dl, DstVT,

- DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);

+ SDValue SInt;

+ if (Node->isStrictFPOpcode()) {

+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },

+ { SrcBiased.getValue(1), Val });

+ Chain = SInt.getValue(1);

+ } else

+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);

+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);

} else {

// Expand based on maximum range of FP_TO_SINT:

// True = fp_to_sint(Src)

@@ -5918,7 +6960,8 @@ SDValue

TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

assert((Node->getOpcode() == ISD::SMULFIX ||

Node->getOpcode() == ISD::UMULFIX ||

- Node->getOpcode() == ISD::SMULFIXSAT) &&

+ Node->getOpcode() == ISD::SMULFIXSAT ||

+ Node->getOpcode() == ISD::UMULFIXSAT) &&

"Expected a fixed point multiplication opcode");

SDLoc dl(Node);

@@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

SDValue RHS = Node->getOperand(1);

EVT VT = LHS.getValueType();

unsigned Scale = Node->getConstantOperandVal(2);

- bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;

+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||

+ Node->getOpcode() == ISD::UMULFIXSAT);

+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||

+ Node->getOpcode() == ISD::SMULFIXSAT);

EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

unsigned VTSize = VT.getScalarSizeInBits();

if (!Scale) {

// [us]mul.fix(a, b, 0) -> mul(a, b)

- if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {

- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);

- } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {

+ if (!Saturating) {

+ if (isOperationLegalOrCustom(ISD::MUL, VT))

+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);

+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {

SDValue Result =

DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);

SDValue Product = Result.getValue(0);

@@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);

Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);

return DAG.getSelect(dl, VT, Overflow, Result, Product);

+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {

+ SDValue Result =

+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);

+ SDValue Product = Result.getValue(0);

+ SDValue Overflow = Result.getValue(1);

+ APInt MaxVal = APInt::getMaxValue(VTSize);

+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);

+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);

}

- bool Signed =

- Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;

assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&

"Expected scale to be less than the number of bits if signed or at "

"most the number of bits if unsigned.");

@@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

if (Scale == VTSize)

// Result is just the top half since we'd be shifting by the width of the

- // operand.

+ // operand. Overflow impossible so this works for both UMULFIX and

+ // UMULFIXSAT.

return Hi;

// The result will need to be shifted right by the scale since both operands

@@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {

if (!Saturating)

return Result;

- unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign

- SDValue HiMask =

- DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);

- SDValue LoMask = DAG.getConstant(

- APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);

- APInt MaxVal = APInt::getSignedMaxValue(VTSize);

- APInt MinVal = APInt::getSignedMinValue(VTSize);

- Result = DAG.getSelectCC(dl, Hi, LoMask,

- DAG.getConstant(MaxVal, dl, VT), Result,

- ISD::SETGT);

- return DAG.getSelectCC(dl, Hi, HiMask,

- DAG.getConstant(MinVal, dl, VT), Result,

- ISD::SETLT);

+ if (!Signed) {

+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the

+ // widened multiplication) aren't all zeroes.

+ // Saturate to max if ((Hi >> Scale) != 0),

+ // which is the same as if (Hi > ((1 << Scale) - 1))

+ APInt MaxVal = APInt::getMaxValue(VTSize);

+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),

+ dl, VT);

+ Result = DAG.getSelectCC(dl, Hi, LowMask,

+ DAG.getConstant(MaxVal, dl, VT), Result,

+ ISD::SETUGT);

+ return Result;

+ }

+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the

+ // widened multiplication) aren't all ones or all zeroes.

+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);

+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);

+ if (Scale == 0) {

+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,

+ DAG.getConstant(VTSize - 1, dl, ShiftTy));

+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);

+ // Saturated to SatMin if wide product is negative, and SatMax if wide

+ // product is positive ...

+ SDValue Zero = DAG.getConstant(0, dl, VT);

+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,

+ ISD::SETLT);

+ // ... but only if we overflowed.

+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);

+ }

+ // We handled Scale==0 above so all the bits to examine is in Hi.

+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),

+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)

+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),

+ dl, VT);

+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);

+ // Saturate to min if (Hi >> (Scale - 1)) < -1),

+ // which is the same as if (HI < (-1 << (Scale - 1))

+ SDValue HighMask =

+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),

+ dl, VT);

+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);

+ return Result;

}

void TargetLowering::expandUADDSUBO(

@@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO(

SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());

- // LHSSign -> LHS >= 0

- // RHSSign -> RHS >= 0

- // SumSign -> Result >= 0

- //

- // Add:

- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)

- // Sub:

- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)

- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);

- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);

- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,

- IsAdd ? ISD::SETEQ : ISD::SETNE);

- SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);

- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);

- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);

- Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);

+ // For an addition, the result should be less than one of the operands (LHS)

+ // if and only if the other operand (RHS) is negative, otherwise there will

+ // be overflow.

+ // For a subtraction, the result should be less than one of the operands

+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,

+ // otherwise there will be overflow.

+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);

+ SDValue ConditionRHS =

+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);

+ Overflow = DAG.getBoolExtOrTrunc(

+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,

+ ResultType, ResultType);

}

bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,

@@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,

// being a legal type for the architecture and thus has to be split to

// two arguments.

SDValue Ret;

+ TargetLowering::MakeLibCallOptions CallOptions;

+ CallOptions.setSExt(isSigned);

+ CallOptions.setIsPostTypeLegalization(true);

if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {

// Halves of WideVT are packed into registers in different order

// depending on platform endianness. This is usually handled by

// the C calling convention, but we can't defer to it in

// the legalizer.

SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };

- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,

- /* doesNotReturn */ false, /* isReturnValueUsed */ true,

- /* isPostTypeLegalization */ true).first;

+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;

} else {

SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };

- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,

- /* doesNotReturn */ false, /* isReturnValueUsed */ true,

- /* isPostTypeLegalization */ true).first;

+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;

}

assert(Ret.getOpcode() == ISD::MERGE_VALUES &&

"Ret value is a collection of constituent nodes holding result.");