aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen/SelectionDAG
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1758
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp77
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp222
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp430
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp510
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp56
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h61
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp283
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp495
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp35
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1406
25 files changed, 3619 insertions, 2119 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49c922f560fa..e8950b58d42d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -111,10 +110,20 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable merging multiple stores "
+ "into a wider store"));
+
static cl::opt<unsigned> TokenFactorInlineLimit(
"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
cl::desc("Limit the number of operands to inline for Token Factors"));
+static cl::opt<unsigned> StoreMergeDependenceLimit(
+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the number of times for the same StoreNode and RootNode "
+ "to bail out in store merging dependence check"));
+
namespace {
class DAGCombiner {
@@ -152,6 +161,14 @@ namespace {
/// which have not yet been combined to the worklist.
SmallPtrSet<SDNode *, 32> CombinedNodes;
+ /// Map from candidate StoreNode to the pair of RootNode and count.
+ /// The count is used to track how many times we have seen the StoreNode
+ /// with the same RootNode bail out in dependence check. If we have seen
+ /// the bail out for the same pair many times over a limit, we won't
+ /// consider the StoreNode with the same RootNode as store merging
+ /// candidate again.
+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
+
// AA - Used for DAG load/store alias analysis.
AliasAnalysis *AA;
@@ -236,6 +253,7 @@ namespace {
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
PruningList.remove(N);
+ StoreRootCountMap.erase(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -361,6 +379,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
@@ -421,7 +440,6 @@ namespace {
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
- SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
@@ -470,7 +488,7 @@ namespace {
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
- SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+ SDValue visitShiftByConstant(SDNode *N);
SDValue foldSelectOfConstants(SDNode *N);
SDValue foldVSelectOfConstants(SDNode *N);
@@ -497,6 +515,7 @@ namespace {
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
bool isOneUseSetCC(SDValue N) const;
+ bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -510,7 +529,7 @@ namespace {
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
@@ -521,11 +540,11 @@ namespace {
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
- SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
- SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
@@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
@@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
DAG.DeleteNode(N);
}
-/// Return 1 if we can compute the negated form of the specified expression for
-/// the same cost as the expression itself, or 2 if we can compute the negated
-/// form more cheaply than the expression itself.
-static char isNegatibleForFree(SDValue Op, bool LegalOperations,
- const TargetLowering &TLI,
- const TargetOptions *Options,
- bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return 2;
-
- // Don't allow anything with multiple uses unless we know it is free.
- EVT VT = Op.getValueType();
- const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse() &&
- !(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
-
- // Don't recurse exponentially.
- if (Depth > 6)
- return 0;
-
- switch (Op.getOpcode()) {
- default: return false;
- case ISD::ConstantFP: {
- if (!LegalOperations)
- return 1;
-
- // Don't invert constant FP values after legalization unless the target says
- // the negated constant is legal.
- return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
- ForCodeSize);
- }
- case ISD::BUILD_VECTOR: {
- // Only permit BUILD_VECTOR of constants.
- if (llvm::any_of(Op->op_values(), [&](SDValue N) {
- return !N.isUndef() && !isa<ConstantFPSDNode>(N);
- }))
- return 0;
- if (!LegalOperations)
- return 1;
- if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
- TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return 1;
- return llvm::all_of(Op->op_values(), [&](SDValue N) {
- return N.isUndef() ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
- ForCodeSize);
- });
- }
- case ISD::FADD:
- if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
- return 0;
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return 1;
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
-
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
-
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- }
-}
-
-/// If isNegatibleForFree returns true, return the newly negated expression.
-static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return Op.getOperand(0);
-
- assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
- const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = Op->getFlags();
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown code");
- case ISD::ConstantFP: {
- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
- V.changeSign();
- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
- }
- case ISD::BUILD_VECTOR: {
- SmallVector<SDValue, 4> Ops;
- for (SDValue C : Op->op_values()) {
- if (C.isUndef()) {
- Ops.push_back(C);
- continue;
- }
- APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
- V.changeSign();
- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
- }
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
- }
- case ISD::FADD:
- assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(0), Flags);
- case ISD::FSUB:
- // fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP =
- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
- if (N0CFP->isZero())
- return Op.getOperand(1);
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0), Flags);
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
-
- // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1), Flags);
-
- case ISD::FP_EXTEND:
- case ISD::FSIN:
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1));
- case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1));
- }
-}
-
// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.
@@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
- AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
@@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
- AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
@@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
for (SDNode *LN : UpdatedNodes) {
- AddToWorklist(LN);
AddUsersToWorklist(LN);
+ AddToWorklist(LN);
}
if (!NIsValid)
continue;
@@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: return visitMULFIX(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
- case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
@@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
+ // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (SDValue Carry = getAsCarry(TLI, N0)) {
+ SDValue X = N1;
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
+ Carry);
+ }
+ }
+
return SDValue();
}
@@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
+// UMULFIXSAT here.
+SDValue DAGCombiner::visitMULFIX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Scale = N->getOperand(2);
+ EVT VT = N0.getValueType();
+
+ // fold (mulfix x, undef, scale) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
+
+ // fold (mulfix x, 0, scale) -> 0
+ if (isNullConstant(N1))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
- if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (VT.isVector()) {
// fold (mulhs x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhs x, 0) -> 0
@@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
@@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (VT.isVector()) {
// fold (mulhu x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhu x, 0) -> 0
@@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // (umul_lohi N0, 0) -> (0, 0)
+ if (isNullConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, Zero, Zero);
+ }
+
+ // (umul_lohi N0, 1) -> (N0, 0)
+ if (isOneConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, N->getOperand(0), Zero);
+ }
+
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitMULO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
bool IsSigned = (ISD::SMULO == N->getOpcode());
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (mulo x, 0) -> 0 + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
// (mulo x, 2) -> (addo x, x)
- if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
- N->getVTList(), N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
return SDValue();
}
@@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
// Input types must be integer and the same.
- if (XVT.isInteger() && XVT == Y.getValueType()) {
+ if (XVT.isInteger() && XVT == Y.getValueType() &&
+ !(VT.isVector() && TLI.isTypeLegal(VT) &&
+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
@@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return true;
}
- // Do not change the width of a volatile load.
- if (LoadN->isVolatile())
+ // Do not change the width of a volatile or atomic loads.
+ if (!LoadN->isSimple())
return false;
// Do not generate loads of non-round integer types since these can
@@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!MemVT.isRound())
return false;
- // Don't change the width of a volatile load.
- if (LDST->isVolatile())
+ // Don't change the width of a volatile or atomic loads.
+ if (!LDST->isSimple())
return false;
// Verify that we are actually reducing a load width here.
if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
return false;
- // Ensure that this isn't going to produce an unsupported unaligned access.
+ // Ensure that this isn't going to produce an unsupported memory access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
LDST->getAddressSpace(), ShAmt / 8,
@@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
return T1;
}
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+ // This is probably not worthwhile without a supported type.
+ EVT VT = And->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // Look through an optional extension and find a 'not'.
+ // TODO: Should we favor test+set even without the 'not' op?
+ SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
+ if (Not.getOpcode() == ISD::ANY_EXTEND)
+ Not = Not.getOperand(0);
+ if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+ return SDValue();
+
+ // Look though an optional truncation. The source operand may not be the same
+ // type as the original 'and', but that is ok because we are masking off
+ // everything but the low bit.
+ SDValue Srl = Not.getOperand(0);
+ if (Srl.getOpcode() == ISD::TRUNCATE)
+ Srl = Srl.getOperand(0);
+
+ // Match a shift-right by constant.
+ if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
+ !isa<ConstantSDNode>(Srl.getOperand(1)))
+ return SDValue();
+
+ // We might have looked through casts that make this transform invalid.
+ // TODO: If the source type is wider than the result type, do the mask and
+ // compare in the source type.
+ const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
+ unsigned VTBitWidth = VT.getSizeInBits();
+ if (ShiftAmt.uge(VTBitWidth))
+ return SDValue();
+
+ // Turn this into a bit-test pattern using mask op + setcc:
+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ SDLoc DL(And);
+ SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Mask = DAG.getConstant(
+ APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+ return DAG.getZExtOrTrunc(Setcc, DL, VT);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
// already be zero by virtue of the width of the base type of the load.
@@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
unsigned MemBitSize = MemVT.getScalarSizeInBits();
APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
if (DAG.MaskedValueIsZero(N1, ExtBits) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
+ ((!LegalOperations && LN0->isSimple()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
@@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
+ if (TLI.hasBitTest(N0, N1))
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
+
return SDValue();
}
@@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return true;
}
+// Match 2 elements of a packed halfword bswap.
+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (N.getOpcode() == ISD::OR)
+ return isBSwapHWordElement(N.getOperand(0), Parts) &&
+ isBSwapHWordElement(N.getOperand(1), Parts);
+
+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
+ if (!C || C->getAPIntValue() != 16)
+ return false;
+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
+ return true;
+ }
+
+ return false;
+}
+
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
@@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
return SDValue();
// Look for either
- // (or (or (and), (and)), (or (and), (and)))
- // (or (or (or (and), (and)), (and)), (and))
- if (N0.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
+ // (or (bswaphpair), (bswaphpair))
+ // (or (or (bswaphpair), (and)), (and))
+ // (or (or (and), (bswaphpair)), (and))
SDNode *Parts[4] = {};
- if (N1.getOpcode() == ISD::OR &&
- N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ if (isBSwapHWordPair(N0, Parts)) {
// (or (or (and), (and)), (or (and), (and)))
- if (!isBSwapHWordElement(N00, Parts))
+ if (!isBSwapHWordPair(N1, Parts))
return SDValue();
-
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- SDValue N10 = N1.getOperand(0);
- if (!isBSwapHWordElement(N10, Parts))
- return SDValue();
- SDValue N11 = N1.getOperand(1);
- if (!isBSwapHWordElement(N11, Parts))
- return SDValue();
- } else {
+ } else if (N0.getOpcode() == ISD::OR) {
// (or (or (or (and), (and)), (and)), (and))
if (!isBSwapHWordElement(N1, Parts))
return SDValue();
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- if (N00.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N000 = N00.getOperand(0);
- if (!isBSwapHWordElement(N000, Parts))
- return SDValue();
- SDValue N001 = N00.getOperand(1);
- if (!isBSwapHWordElement(N001, Parts))
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
- }
+ } else
+ return SDValue();
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
@@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
- bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- if (!LegalMask) {
- std::swap(NewLHS, NewRHS);
- ShuffleVectorSDNode::commuteMask(Mask);
- LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
+ Mask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
}
}
@@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return V;
// See if this is some rotate idiom.
- if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
- return SDValue(Rot, 0);
+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return Rot;
if (SDValue Load = MatchLoadCombine(N))
return Load;
@@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
/// (or (mul v c0) (shrl (mul v c1) c2)):
/// expands (mul v c0) -> (shl (mul v c1) c3)
///
@@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
"Existing shift must be valid as a rotate half");
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
// Preconditions:
// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
//
@@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// op0 must be the same opcode on both sides, have the same LHS argument,
// and produce the same value type.
- SDValue OppShiftLHS = OppShift.getOperand(0);
- EVT ShiftedVT = OppShiftLHS.getValueType();
if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
ShiftedVT != ExtractFrom.getValueType())
return SDValue();
- // Amount of the existing shift.
- ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
@@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
// former being preferred if supported. InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
-SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
unsigned NegOpcode, const SDLoc &DL) {
@@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
- HasPos ? Pos : Neg).getNode();
+ HasPos ? Pos : Neg);
}
- return nullptr;
+ return SDValue();
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT)) return nullptr;
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
// The target must have at least one rotate flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR) return nullptr;
+ if (!HasROTL && !HasROTR)
+ return SDValue();
// Check for truncated rotate.
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
assert(LHS.getValueType() == RHS.getValueType());
- if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
- return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
- SDValue(Rot, 0)).getNode();
+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
}
}
@@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If neither side matched a rotate half, bail
if (!LHSShift && !RHSShift)
- return nullptr;
+ return SDValue();
// InstCombine may have combined a constant shl, srl, mul, or udiv with one
// side of the rotate, so try to handle that here. In all cases we need to
@@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If a side is still missing, nothing else we can do.
if (!RHSShift || !LHSShift)
- return nullptr;
+ return SDValue();
// At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return nullptr; // Not shifting the same value.
+ return SDValue(); // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
- return nullptr; // Shifts must disagree.
+ return SDValue(); // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
@@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
- return Rot.getNode();
+ return Rot;
}
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
- return nullptr;
+ return SDValue();
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
@@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
- SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
- return nullptr;
+ return SDValue();
}
namespace {
@@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
Depth + 1);
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
- if (L->isVolatile() || L->isIndexed())
+ if (!L->isSimple() || L->isIndexed())
return None;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
@@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
if (Store->getMemoryVT() != MVT::i8 ||
- Store->isVolatile() || Store->isIndexed())
+ !Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
LoadSDNode *L = P->Load;
- assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
+ !L->isIndexed() &&
"Must be enforced by calculateByteProvider");
assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
@@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
@@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return SDValue();
}
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+ // Match a one-use bitwise logic op.
+ SDValue LogicOp = Shift->getOperand(0);
+ if (!LogicOp.hasOneUse())
+ return SDValue();
+
+ unsigned LogicOpcode = LogicOp.getOpcode();
+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+ LogicOpcode != ISD::XOR)
+ return SDValue();
+
+ // Find a matching one-use shift by constant.
+ unsigned ShiftOpcode = Shift->getOpcode();
+ SDValue C1 = Shift->getOperand(1);
+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+ assert(C1Node && "Expected a shift with constant operand");
+ const APInt &C1Val = C1Node->getAPIntValue();
+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+ const APInt *&ShiftAmtVal) {
+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+ return false;
+
+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+ if (!ShiftCNode)
+ return false;
+
+ // Capture the shifted operand and shift amount value.
+ ShiftOp = V.getOperand(0);
+ ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+ // Shift amount types do not have to match their operand type, so check that
+ // the constants are the same width.
+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+ return false;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ SDValue X, Y;
+ const APInt *C0Val;
+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+ Y = LogicOp.getOperand(1);
+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+ Y = LogicOp.getOperand(0);
+ else
+ return SDValue();
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ SDLoc DL(Shift);
+ EVT VT = Shift->getValueType(0);
+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
/// We are looking for: (shift being one of shl/sra/srl)
/// shift (binop X, C0), C1
/// And want to transform into:
/// binop (shift X, C1), (shift C0, C1)
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
+
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
// The inner binop must be one-use, since we want to replace it.
- SDNode *LHS = N->getOperand(0).getNode();
- if (!LHS->hasOneUse()) return SDValue();
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
+ return SDValue();
+
+ // TODO: This is limited to early combining because it may reveal regressions
+ // otherwise. But since we just checked a target hook to see if this is
+ // desirable, that should have filtered out cases where this interferes
+ // with some other pattern matching.
+ if (!LegalTypes)
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- switch (LHS->getOpcode()) {
+ switch (LHS.getOpcode()) {
default:
return SDValue();
case ISD::OR:
@@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
}
// We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
if (!BinOpCst)
return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
- SDValue BinOpLHSVal = LHS->getOperand(0);
+ SDValue BinOpLHSVal = LHS.getOperand(0);
bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
BinOpLHSVal.getOpcode() == ISD::SRA ||
BinOpLHSVal.getOpcode() == ISD::SRL) &&
@@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
- EVT VT = N->getValueType(0);
-
- if (!TLI.isDesirableToCommuteWithShift(N, Level))
- return SDValue();
-
// Fold the constants, shifting the binop RHS by the shift amount.
- SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
- N->getValueType(0),
- LHS->getOperand(1), N->getOperand(1));
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
+ N->getOperand(1));
assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
- // Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(),
- SDLoc(LHS->getOperand(0)),
- VT, LHS->getOperand(0), N->getOperand(1));
-
- // Create the new binop.
- return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
@@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
if (N1C && !N1C->isOpaque())
- if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
return SDValue();
@@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+ // sra (add (shl X, N1C), AddC), N1C -->
+ // sext (add (trunc X to (width - N1C)), AddC')
+ if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+ if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+ SDValue Shl = N0.getOperand(0);
+ // Determine what the truncate's type would be and ask the target if that
+ // is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but that
+ // restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+ trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
+ }
+ }
+
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRA = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
return SDValue();
@@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRL = visitShiftByConstant(N))
return NewSRL;
// Attempt to convert a srl of a load into a narrower zero-extending load.
@@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+ "Expected select-of-constants");
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
return Cond;
}
- // For any constants that differ by 1, we can transform the select into an
- // extend and add. Use a target hook because some targets may prefer to
- // transform in the other direction.
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
if (TLI.convertSelectOfConstantsToMath(VT)) {
- if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+ if (C1Val - 1 == C2Val) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
- if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ if (C1Val + 1 == C2Val) {
// select Cond, C1, C1+1 --> add (sext Cond), C1+1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
}
return SDValue();
@@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
-static
-std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
@@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
SDValue Chain = MSC->getChain();
SDLoc DL(N);
@@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSCATTER data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
- SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
-
- EVT MemoryVT = MSC->getMemoryVT();
- unsigned Alignment = MSC->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- SDValue Scale = MSC->getScale();
- SDValue BasePtr = MSC->getBasePtr();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MSC->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MSC->getAAInfo(), MSC->getRanges());
-
- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
- SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- DataLo.getValueType(), DL, OpsLo, MMO);
-
- // The order of the Scatter operation after split is well defined. The "Hi"
- // part comes after the "Lo". So these two operations should be chained one
- // after another.
- SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ return SDValue();
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
SDValue Chain = MST->getChain();
- EVT VT = Data.getValueType();
SDLoc DL(N);
// Zap masked stores with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSTORE data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue Ptr = MST->getBasePtr();
-
- EVT MemoryVT = MST->getMemoryVT();
- unsigned Alignment = MST->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MST->getAAInfo(), MST->getRanges());
-
- Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MST->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
- MST->getAAInfo(), MST->getRanges());
-
- Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
- }
return SDValue();
}
@@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MGT->getPassThru(), MGT->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MGATHER result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
-
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MGT->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
- SDValue Chain = MGT->getChain();
- EVT MemoryVT = MGT->getMemoryVT();
- unsigned Alignment = MGT->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue Scale = MGT->getScale();
- SDValue BasePtr = MGT->getBasePtr();
- SDValue Index = MGT->getIndex();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
-
- SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
- MMO);
-
- SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
- MMO);
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
-
- SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { GatherRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
+ return SDValue();
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
@@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MLOAD result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MLD->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
-
- SDValue Chain = MLD->getChain();
- SDValue Ptr = MLD->getBasePtr();
- EVT MemoryVT = MLD->getMemoryVT();
- unsigned Alignment = MLD->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
-
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
- MLD->getAAInfo(), MLD->getRanges());
-
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
-
- SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { LoadRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
- }
return SDValue();
}
@@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
@@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
if (isAbs) {
- EVT VT = LHS.getValueType();
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
return DAG.getNode(ISD::ABS, DL, VT, LHS);
- SDValue Shift = DAG.getNode(
- ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DL, getShiftAmountTy(VT)));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
- N0.getOperand(1), TLI)) {
- if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
- !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
- !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ !N0.hasOneUse() || !LN0->isSimple() ||
+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
@@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
+ if ((LegalOperations || !LN0->isSimple() ||
+ VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return SDValue();
@@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
((LegalOperations || VT.isVector() ||
- cast<LoadSDNode>(N0)->isVolatile()) &&
+ !cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
@@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SDLoc dl(Ld);
+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(),
+ Ld->getMemOperand(), ExtLoadType,
+ Ld->isExpandingLoad());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
+ return NewLoad;
+}
+
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
bool LegalOperations) {
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
@@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
+ ISD::SIGN_EXTEND))
+ return foldedExt;
+
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND))
+ return foldedExt;
+
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
+ // Reducing the width of a volatile load is illegal. For atomics, we may be
+ // able to reduce the width provided we never widen again. (see D66309)
+ if (!LN0->isSimple() ||
+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
@@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
- Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
NewAlign, LN0->getMemOperand()->getFlags(),
LN0->getAAInfo());
@@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- SDLoc DL(N0);
if (ShLeftAmt >= VT.getSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
@@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!LN0->isVolatile() &&
+ if (LN0->isSimple() &&
LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
@@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
for (int i = 0; i != MaskScale; ++i)
NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
- bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- if (!LegalMask) {
- std::swap(SV0, SV1);
- ShuffleVectorSDNode::commuteMask(NewMask);
- LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
return SDValue();
@@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N1,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if ((Options.UnsafeFPMath ||
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) {
return N0;
}
@@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
}
- if ((Options.UnsafeFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
- && N1.getOpcode() == ISD::FADD) {
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
@@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(
+ ISD::FADD, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+ if (char LHSNeg =
+ TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
+ if (char RHSNeg =
+ TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
+ // Both negated operands are at least as cheap as their counterparts.
+ // Check to see if at least one is cheaper negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return true;
+
+ return false;
+}
+
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
- // fold (fmul A, 1.0) -> A
- if (N1CFP && N1CFP->isExactlyValue(1.0))
- return N0;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
- // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
+ // -N0 * -N1 --> N0 * N1
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
}
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ }
+
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
@@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
@@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
- }
+ if (isCheaperToUseNegatedFPOps(N0, N1))
+ return DAG.getNode(
+ ISD::FDIV, SDLoc(N), VT,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
return SDValue();
}
@@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-
- // fold (fp_round_inreg c1fp) -> c1fp
- if (N0CFP && isTypeLegal(EVT)) {
- SDLoc DL(N);
- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
- return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
- if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, ForCodeSize))
- return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
- if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
- if (!ST || ST->isVolatile())
+ // TODO: Relax this restriction for unordered atomics (see D66309)
+ if (!ST || !ST->isSimple())
return SDValue();
EVT LDType = LD->getValueType(0);
@@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
- if (!LD->isVolatile()) {
+ // TODO: Allow this for unordered atomics (see D66309)
+ if (LD->isSimple()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
if (!N->hasAnyUseOfValue(0)) {
@@ -14241,7 +14138,7 @@ struct LoadedSlice {
/// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
- bool ForCodeSize;
+ bool ForCodeSize = false;
/// Various cost.
unsigned Loads = 0;
@@ -14250,10 +14147,10 @@ struct LoadedSlice {
unsigned ZExts = 0;
unsigned Shift = 0;
- Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
/// Get the cost of one isolated slice.
- Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ Cost(const LoadedSlice &LS, bool ForCodeSize)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
@@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
return false;
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
!LD->getValueType(0).isInteger())
return false;
@@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
else if (Chain->getOpcode() == ISD::TokenFactor &&
SDValue(LD, 1).hasOneUse()) {
// LD has only 1 chain use so they are no indirect dependencies.
- bool isOk = false;
- for (const SDValue &ChainOp : Chain->op_values())
- if (ChainOp.getNode() == LD) {
- isOk = true;
- break;
- }
- if (!isOk)
+ if (!LD->isOperandOf(Chain.getNode()))
return Result;
} else
return Result; // Fail.
@@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
-static SDNode *
+static SDValue
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
DAGCombiner *DC) {
@@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
- if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
- // legalization.
- MVT VT = MVT::getIntegerVT(NumBytes*8);
+ // legalization (and the target doesn't explicitly think this is a bad idea).
+ MVT VT = MVT::getIntegerVT(NumBytes * 8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DC->isTypeLegal(VT))
- return nullptr;
+ return SDValue();
+ if (St->getMemOperand() &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *St->getMemOperand()))
+ return SDValue();
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
@@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign)
- .getNode();
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign);
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (ST->isVolatile())
+ if (!ST->isSimple())
return SDValue();
SDValue Chain = ST->getChain();
@@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(0), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
}
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
@@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile/indexed.
- if (Ld->isVolatile() || Ld->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Ld->isSimple() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
- // The memory operands must not be volatile/indexed.
- if (Other->isVolatile() || Other->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile/indexed.
- if (OtherLd->isVolatile() || OtherLd->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() ||
+ OtherLd->isIndexed())
return false;
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
@@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates(
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
+ // Check if the pair of StoreNode and the RootNode already bail out many
+ // times which is over the limit in dependence check.
+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
+ SDNode *RootNode) -> bool {
+ auto RootCount = StoreRootCountMap.find(StoreNode);
+ if (RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit)
+ return true;
+ return false;
+ };
+
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
@@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
@@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
}
@@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
- Max))
+ Max)) {
+ // If the searching bail out, record the StoreNode and RootNode in the
+ // StoreRootCountMap. If we have seen the pair many times over a limit,
+ // we won't add the StoreNode into StoreNodes set again.
+ if (Visited.size() >= Max) {
+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
+ if (RootCount.first == RootNode)
+ RootCount.second++;
+ else
+ RootCount = {RootNode, 1};
+ }
return false;
+ }
return true;
}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
return false;
EVT MemVT = St->getMemoryVT();
@@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool RV = false;
while (StoreNodes.size() > 1) {
- unsigned StartIdx = 0;
+ size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
StoreNodes[StartIdx + 1].OffsetFromBase)
@@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
case MVT::ppcf128:
return SDValue();
case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
@@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
return SDValue();
case MVT::f64:
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
+ ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (!ST->isVolatile() &&
+ if (ST->isSimple() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
@@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- if (((!LegalOperations && !ST->isVolatile()) ||
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (((!LegalOperations && ST->isSimple()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
@@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter)
+ if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
ST->getMemOperand());
@@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a load followed by a store to the same location, then the store
// is dead/noop.
+ // TODO: Can relax for unordered atomics (see D66309)
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
- ST->isUnindexed() && !ST->isVolatile() &&
+ ST->isUnindexed() && ST->isSimple() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
@@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile()) {
+ if (ST->isUnindexed() && ST->isSimple() &&
+ ST1->isUnindexed() && ST1->isSimple()) {
if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
ST->getMemoryVT() == ST1->getMemoryVT()) {
// If this is a store followed by a store with the same value to the
@@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
break;
case ISD::STORE: {
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
- if (ST->isVolatile() || ST->isIndexed())
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (!ST->isSimple() || ST->isIndexed())
continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
@@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Can't change the number of memory accesses for a volatile store or break
+ // atomicity for an atomic one.
+ if (!ST->isSimple())
+ return SDValue();
+
SDValue Val = ST->getValue();
SDLoc DL(ST);
@@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
}
/// Convert a disguised subvector insertion into a shuffle:
-/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
-/// bitcast(shuffle (bitcast V), (extended X), Mask)
-/// Note: We do not use an insert_subvector node because that requires a legal
-/// subvector type.
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
+
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
+ // --> (vector_shuffle X, Y)
+ if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
+ InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(InsertVal.getOperand(1))) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ int XOffset = -1;
+ if (InsertVal.getOperand(0) == X) {
+ XOffset = 0;
+ } else if (InsertVal.getOperand(0) == Y) {
+ XOffset = X.getValueType().getVectorNumElements();
+ }
+
+ if (XOffset != -1) {
+ SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+
+ auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
+ NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ assert(NewMask[InsIndex] <
+ (int)(2 * Vec.getValueType().getVectorNumElements()) &&
+ NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
+ Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+
+ // insert_vector_elt V, (bitcast X from vector type), IdxC -->
+ // bitcast(shuffle (bitcast V), (extended X), Mask)
+ // Note: We do not use an insert_subvector node because that requires a
+ // legal subvector type.
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
@@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue EltNo,
LoadSDNode *OriginalLoad) {
- assert(!OriginalLoad->isVolatile());
+ assert(OriginalLoad->isSimple());
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
@@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddToWorklist(Load.getNode());
AddUsersToWorklist(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorklist(EVE);
+ AddToWorklist(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
ISD::isNormalLoad(VecOp.getNode()) &&
!Index->hasPredecessor(VecOp.getNode())) {
auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
- if (VecLoad && !VecLoad->isVolatile())
+ if (VecLoad && VecLoad->isSimple())
return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
@@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Make sure we found a non-volatile load and the extractelement is
// the only use.
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
@@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the shuffle mask with -1.
}
- // Turn this into a shuffle with zero if that's legal.
- EVT VecVT = Extract.getOperand(0).getValueType();
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
- return SDValue();
-
// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
// bitcast (shuffle V, ZeroVec, VectorMask)
SDLoc DL(BV);
+ EVT VecVT = Extract.getOperand(0).getValueType();
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
- SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
- ShufMask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
+ ZeroVec, ShufMask, DAG);
+ if (!Shuf)
+ return SDValue();
return DAG.getBitcast(VT, Shuf);
}
@@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
}
}
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
- DAG.getBitcast(VT, SV1), Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask, DAG);
}
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
@@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
SDValue Scalar = peekThroughOneUseBitcasts(In);
// concat_vectors(scalar_to_vector(scalar), undef) ->
@@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+// if the subvector can be sourced for free.
+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == SubVT &&
+ (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+}
+
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
+ EVT VecVT = BinOp.getValueType();
SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
- SDValue Index = Extract->getOperand(1);
- EVT VT = Extract->getValueType(0);
+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
+ return SDValue();
- // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
- // if the source subvector is the same type as the one being extracted.
- auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
- if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
- return V.getOperand(1);
- }
- auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
- V.getOperand(0).getValueType() == VT &&
- (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
- return V.getOperand(SubIdx);
- }
+ SDValue Index = Extract->getOperand(1);
+ EVT SubVT = Extract->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
return SDValue();
- };
- SDValue Sub0 = GetSubVector(Bop0);
- SDValue Sub1 = GetSubVector(Bop1);
+
+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
// TODO: We could handle the case where only 1 operand is being inserted by
// creating an extract of the other operand, but that requires checking
// number of uses and/or costs.
- if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ if (!Sub0 || !Sub1)
return SDValue();
// We are inserting both operands of the wide binop only to extract back
// to the narrow vector size. Eliminate all of the insert/extract:
// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
- return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
BinOp->getFlags());
}
@@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
+ !ExtIdx)
return SDValue();
// Allow targets to opt-out.
@@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
- if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
@@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SV1 = DAG.getUNDEF(VT);
// Avoid introducing shuffles with illegal mask.
- if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- ShuffleVectorSDNode::commuteMask(Mask);
-
- if (!TLI.isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- std::swap(SV0, SV1);
- }
-
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
- SDValue Val;
// If we have an implict truncate do truncate here as long as it's legal.
// if it's not legal, this should
if (VT.getScalarType() != InVal.getValueType() &&
InVal.getValueType().isScalarInteger() &&
isTypeLegal(VT.getScalarType())) {
- Val =
+ SDValue Val =
DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
}
if (VT.getScalarType() == InVecT.getScalarType() &&
- VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
- TLI.isShuffleMaskLegal(NewMask, VT)) {
- Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
- DAG.getUNDEF(InVecT), NewMask);
- // If the initial vector is the correct size this shuffle is a
- // valid result.
- if (VT == InVecT)
- return Val;
- // If not we must truncate the vector.
- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
- EVT SubVT =
- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
- VT.getVectorNumElements());
- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
- ZeroIdx);
- return Val;
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask, DAG);
+ if (LegalShuffle) {
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return LegalShuffle;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
+ LegalShuffle, ZeroIdx);
+ }
}
}
}
@@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
+ // Make sure all but the first op are undef or constant.
+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {
+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+ std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+ [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
+ };
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of the concat may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
+ // concat (VBinOp X, Y), VecC
+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ EVT NarrowVT = LHS.getOperand(0).getValueType();
+ if (NarrowVT == RHS.getOperand(0).getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ SDLoc DL(N);
+ unsigned NumOperands = LHS.getNumOperands();
+ SmallVector<SDValue, 4> ConcatOps;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ // This constant fold for operands 1 and up.
+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
+ RHS.getOperand(i)));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ }
+ }
+
if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
return V;
@@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// Token chains must be identical.
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- LLD->isVolatile() || RLD->isVolatile() ||
+ // Be conservative for atomics for the moment
+ // TODO: This does appear to be legal for unordered atomics (see D66309)
+ !LLD->isSimple() || !RLD->isSimple() ||
// FIXME: If either is a pre/post inc/dec load,
// we'd need to split out the address adjustment.
LLD->isIndexed() || RLD->isIndexed() ||
@@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
- if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
return SDValue();
// If we are before legalize types, we want the other legalization to happen
@@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
+ // TODO: The operation legality checks could be loosened to include "custom",
+ // but that may cause regressions for targets that do not have shift
+ // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
+ TLI.isOperationLegal(ISD::SHL, VT) &&
+ TLI.isOperationLegal(ISD::SRA, VT)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
@@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
+ SDNodeFlags Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
AddToWorklist(Est.getNode());
+ SDLoc DL(Op);
if (Iterations) {
- SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last iteration, also multiply by the numerator.
for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,
+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
AddToWorklist(Est.getNode());
}
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
}
+
return Est;
}
@@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
-
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
- AddToWorklist(Est.getNode());
}
// If non-reciprocal square root is requested, multiply the result by Arg.
- if (!Reciprocal) {
+ if (!Reciprocal)
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
- AddToWorklist(Est.getNode());
- }
return Est;
}
@@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- AddToWorklist(AE.getNode());
-
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- AddToWorklist(AEE.getNode());
-
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
- AddToWorklist(RHS.getNode());
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// SQRT: LHS = (A * E) * -0.5
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
}
- AddToWorklist(LHS.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
- AddToWorklist(Est.getNode());
}
return Est;
@@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- AddToWorklist(Fabs.getNode());
- AddToWorklist(IsDenorm.getNode());
- AddToWorklist(Est.getNode());
} else {
// X == 0.0 ? 0.0 : Est
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- AddToWorklist(IsZero.getNode());
- AddToWorklist(Est.getNode());
}
}
}
@@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
+ bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
Optional<int64_t> NumBytes;
@@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
- return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ Offset /*base offset*/,
Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
- return {false /*isVolatile*/, LN->getOperand(1),
+ return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
- return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/,
Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
};
@@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
if (MUC0.IsVolatile && MUC1.IsVolatile)
return true;
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
if (MUC0.MMO && MUC1.MMO) {
if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
@@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
+ // TODO: relax aliasing for unordered atomics (see D66309)
+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
// Starting off.
Chains.push_back(OriginalChain);
@@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for C.
+ // TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
- !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ cast<LSBaseSDNode>(C.getNode())->isSimple();
if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
@@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())
break;
- if (Chain->isVolatile() || Chain->isIndexed())
+ // TODO: Relax for unordered atomics (see D66309)
+ if (!Chain->isSimple() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
@@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
- AddToWorklist(STChain);
- // Add TF operands worklist in reverse order.
- for (auto I = TF->getNumOperands(); I;)
- AddToWorklist(TF->getOperand(--I).getNode());
+ // Add TF and its operands to the worklist.
AddToWorklist(TF.getNode());
+ for (const SDValue &Op : TF->ops())
+ AddToWorklist(Op.getNode());
+ AddToWorklist(STChain);
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 22c23ba877e8..6d7260d7aee5 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
if (RegDef)
return 0;
RegDef = MO.getReg();
- } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ } else if (Register::isVirtualRegister(MO.getReg())) {
// This is another use of a vreg. Don't try to sink it.
return 0;
}
@@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (!FrameAlign)
FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
@@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
- MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
+ const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
MF->addCodeViewHeapAllocSite(CLI.Call, MD);
}
@@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
@@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
+ auto *Expr = DI->getExpression();
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- *Op, DI->getVariable(), DI->getExpression());
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
+ *Op, DI->getVariable(), Expr);
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
}
- case Intrinsic::objectsize: {
- ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
- unsigned long long Res = CI->isZero() ? -1ULL : 0;
- Constant *ResCI = ConstantInt::get(II->getType(), Res);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::is_constant: {
- Constant *ResCI = ConstantInt::get(II->getType(), 0);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) {
/// (fall-through) successor, and update the CFG.
void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
const DebugLoc &DbgLoc) {
- if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // For more accurate line information if this is the only instruction
- // in the block then emit it, otherwise we have the unconditional
- // fall-through case, which needs no instructions.
+ // For more accurate line information if this is the only non-debug
+ // instruction in the block then emit it, otherwise we have the
+ // unconditional fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
@@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
unsigned OpNum) {
- if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ if (Register::isVirtualRegister(Op)) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
if (!MRI.constrainRegClass(Op, RegClass)) {
@@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
@@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
} else
return nullptr;
- bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
- bool IsDereferenceable =
- I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
AAMDNodes AAInfo;
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b1759246b76..cf6711adad48 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
unsigned BitWidth = IntVT.getSizeInBits();
unsigned DestReg = ValueMap[PN];
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
return;
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
@@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
"its CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9bc07d35dfc5..c5095995ec2e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
- if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ if (Register::isPhysicalRegister(RN->getReg()))
continue;
NumImpUses = N - I;
break;
@@ -86,7 +86,7 @@ void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned VRBase = 0;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(SrcReg)) {
// Just use the input register directly!
SDValue Op(Node, ResNo);
if (IsClone)
@@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
Match = false;
} else if (DestReg != SrcReg)
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
+ TRI->getCommonSubClass(UseRC, RC);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
@@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ assert(Register::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
}
@@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
if (RegRC == RC) {
VRBase = Reg;
@@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
// does not include operand register class info.
const TargetRegisterClass *RC = TLI->getRegClassFor(
Op.getSimpleValueType(), Op.getNode()->isDivergent());
- unsigned VReg = MRI->createVirtualRegister(RC);
+ Register VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
if (!ConstrainedRC) {
OpRC = TRI->getAllocatableClass(OpRC);
assert(OpRC && "Constraints cannot be fulfilled for allocation");
- unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+ Register NewVReg = MRI->createVirtualRegister(OpRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
(IIRC && TRI->isDivergentRegClass(IIRC)))
: nullptr;
- if (OpRC && IIRC && OpRC != IIRC &&
- TargetRegisterInfo::isVirtualRegister(VReg)) {
- unsigned NewVReg = MRI->createVirtualRegister(IIRC);
+ if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ Register NewVReg = MRI->createVirtualRegister(IIRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
// register instead.
RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
.addReg(VReg);
return NewReg;
@@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
break;
}
@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
- if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (R && Register::isPhysicalRegister(R->getReg())) {
Reg = R->getReg();
DefMI = nullptr;
} else {
@@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
Node->isDivergent(), Node->getDebugLoc());
@@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
MachineInstrBuilder CopyMI =
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase);
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
CopyMI.addReg(Reg, 0, SubIdx);
else
CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
@@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *DstRC =
TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg).addReg(VReg);
@@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
- unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
unsigned NumOps = Node->getNumOperands();
@@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
// Skip physical registers as they don't have a vreg to get and we'll
// insert copies for them in TwoAddressInstructionPass anyway.
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (!R || !Register::isPhysicalRegister(R->getReg())) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -678,7 +677,7 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ const DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx());
+
if (SD->isIndirect())
- // Push [fi + 0] onto the DIExpression stack.
- FrameMI.addImm(0);
- else
- // Push fi onto the DIExpression stack.
- FrameMI.addReg(0);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ FrameMI.addReg(0);
return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
@@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- MIB.addImm(0U);
- else
- MIB.addReg(0U, RegState::Debug);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ MIB.addReg(0U, RegState::Debug);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
//
// Collect all the used physreg defs, and make sure that any unused physreg
// defs are marked as dead.
- SmallVector<unsigned, 8> UsedRegs;
+ SmallVector<Register, 8> UsedRegs;
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = NumDefs; i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - NumDefs];
+ Register Reg = II.getImplicitDefs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
@@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = R->getReg();
+ if (Reg.isPhysical())
UsedRegs.push_back(Reg);
}
}
@@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::CopyToReg: {
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
- SrcVal.isMachineOpcode() &&
+ if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Instead building a COPY to that vreg destination, build an
// IMPLICIT_DEF instruction instead.
@@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
// regalloc. This makes inline asm look a lot like calls.
- MIB.addReg(Reg, RegState::Define |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
ECRegs.push_back(Reg);
}
break;
@@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// then remove the early-clobber flag.
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
- MachineOperand *MO =
+ MachineOperand *MO =
MIB->findRegisterDefOperand(Reg, false, false, TRI);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bf817f00f83d..f9fdf525240f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -161,6 +162,7 @@ private:
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
@@ -236,6 +238,16 @@ public:
}
ReplacedNode(Old);
}
+
+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesOfValueWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
};
} // end anonymous namespace
@@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
EVT MemVT = ST->getMemoryVT();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
@@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
EVT MemVT = ST->getMemoryVT();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
}
@@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *LD->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
}
break;
@@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
if (SrcVT.getScalarType() == MVT::f16) {
EVT ISrcVT = SrcVT.changeTypeToInteger();
EVT IDestVT = DestVT.changeTypeToInteger();
- EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
- SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
- Chain, Ptr, ISrcVT,
- LD->getMemOperand());
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
+ Ptr, ISrcVT, LD->getMemOperand());
Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
Chain = Result.getValue(1);
break;
@@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Node->getValueType(i))) &&
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Op.getValueType()) ||
Op.getOpcode() == ISD::TargetConstant ||
Op.getOpcode() == ISD::Register) &&
"Unexpected illegal type!");
@@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG: {
EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Node->getOperand(1).getValueType());
break;
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
- bool NeedSwap = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
@@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
return true;
}
// Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
@@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
@@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode()) {
- LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
}
- LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
return CallInfo.first;
}
@@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
RTLIB::Libcall LC;
switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::STRICT_FP_ROUND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::FP_ROUND_INREG: {
- // The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targeting a temporary location (a stack slot).
-
- // NOTE: there is a choice here between constantly creating new stack
- // slots and always reusing the same one. We currently always create
- // new ones, as reuse may inhibit scheduling.
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
- break;
- }
case ISD::UINT_TO_FP:
if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
Results.push_back(Tmp1);
@@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+ return true;
+ }
+ break;
case ISD::FP_TO_UINT:
- if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
Results.push_back(Tmp1);
break;
- case ISD::LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
- break;
- case ISD::LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
- break;
- case ISD::LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
- break;
- case ISD::LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ case ISD::STRICT_FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+ // Relink the chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+ // Replace the new UINT result.
+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+ return true;
+ }
break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
@@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::ADDCARRY:
@@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::BUILD_VECTOR:
Results.push_back(ExpandBUILD_VECTOR(Node));
break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: {
@@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
+ if (Results.empty() && Node->isStrictFPOpcode()) {
+ // FIXME: We were asked to expand a strict floating-point operation,
+ // but there is currently no expansion implemented that would preserve
+ // the "strict" properties. For now, we just fall back to the non-strict
+ // version if that is legal on the target. The actual mutation of the
+ // operation will happen in SelectionDAGISel::DoInstructionSelection.
+ switch (Node->getOpcode()) {
+ default:
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ return true;
+ break;
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These are registered by the operand type instead of the value
+ // type. Reflect that here.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType())
+ == TargetLowering::Legal)
+ return true;
+ break;
+ }
+ }
+
// Replace the original node with the legalized result.
if (Results.empty()) {
LLVM_DEBUG(dbgs() << "Cannot expand node\n");
@@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::POW_F80, RTLIB::POW_F128,
RTLIB::POW_PPCF128));
break;
+ case ISD::LROUND:
+ case ISD::STRICT_LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ case ISD::STRICT_LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ case ISD::STRICT_LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ case ISD::STRICT_LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
case ISD::FDIV:
Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
RTLIB::DIV_F80, RTLIB::DIV_F128,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b4849b2881e6..72d052473f11 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Convert Float Results to Integer for Non-HW-supported Operations.
+// Convert Float Results to Integer
//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::Register:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- assert(isLegalInHWReg(N->getValueType(ResNo)) &&
- "Unsupported SoftenFloatRes opcode!");
- // Only when isLegalInHWReg, we can skip check of the operands.
- R = SDValue(N, ResNo);
- break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}
- if (R.getNode() && R.getNode() != N) {
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ assert(R.getNode() != N);
SetSoftenedFloat(SDValue(N, ResNo), R);
- // Return true only if the node is changed, assuming that the operands
- // are also converted when necessary.
- return true;
}
-
- // Otherwise, return false to tell caller to scan operands.
- return false;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, we can load better from the constant pool.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
// In ppcf128, the high 64 bits are always first in memory regardless
// of Endianness. LLVM's APFloat representation is not Endian sensitive,
@@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, keep the extracted value in register.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
NewOp.getValueType().getVectorElementType(),
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FABS can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32,
RTLIB::FMIN_F64,
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMAX_F32,
RTLIB::FMAX_F64,
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::CEIL_F32,
RTLIB::CEIL_F64,
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::COS_F32,
RTLIB::COS_F64,
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP_F32,
RTLIB::EXP_F64,
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP2_F32,
RTLIB::EXP2_F64,
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FLOOR_F32,
RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG_F32,
RTLIB::LOG_F64,
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG2_F32,
RTLIB::LOG2_F64,
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG10_F32,
RTLIB::LOG10_F64,
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)),
GetSoftenedFloat(N->getOperand(2)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[3] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType(),
+ N->getOperand(2).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- EVT FloatVT = N->getValueType(ResNo);
+ EVT FloatVT = N->getValueType(0);
if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
// Expand Y = FNEG(X) -> Y = X ^ sign mask
APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
@@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, dl).first;
+ NVT, Ops, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
- false, SDLoc(N)).first;
+ CallOptions, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POW_F32,
RTLIB::POW_F64,
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
"Unsupported power type!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POWI_F32,
RTLIB::POWI_F64,
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
RTLIB::REM_F64,
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::RINT_F32,
RTLIB::RINT_F64,
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32,
RTLIB::SIN_F64,
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32,
RTLIB::SQRT_F64,
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
- bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
- if (LegalInHWReg)
- return ExtendNode;
return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
NVT, N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- Op, Signed, dl).first;
+ Op, CallOptions, dl).first;
}
//===----------------------------------------------------------------------===//
-// Convert Float Operand to Integer for Non-HW-supported Operations.
+// Convert Float Operand to Integer
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
- if (CanSkipSoftenFloatOperand(N, OpNo))
- return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
- case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
- case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;
- case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
- case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
- case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE:
- Res = SoftenFloatOp_STORE(N, OpNo);
- // Do not try to analyze or soften this node again if the value is
- // or can be held in a register. In that case, Res.getNode() should
- // be equal to N.
- if (Res.getNode() == N &&
- isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
- // Otherwise, we need to reanalyze and lower the new Res nodes.
- break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
+ "Invalid operand promotion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
-bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
- if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
-
- // When the operand type can be kept in registers there is nothing to do for
- // the following opcodes.
- switch (N->getOperand(OpNo).getOpcode()) {
- case ISD::BITCAST:
- case ISD::ConstantFP:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- case ISD::FABS:
- case ISD::FCOPYSIGN:
- case ISD::FNEG:
- case ISD::Register:
- case ISD::SELECT:
- case ISD::SELECT_CC:
- return true;
- }
-
- switch (N->getOpcode()) {
- case ISD::ConstantFP: // Leaf node.
- case ISD::CopyFromReg: // Operand is a register that we know to be left
- // unchanged by SoftenFloatResult().
- case ISD::Register: // Leaf node.
- return true;
- }
- return false;
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
- GetSoftenedFloat(N->getOperand(0)));
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- if (N->getNumOperands() == 3)
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
- N->getOperand(3)),
- 0);
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
@@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
@@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(2), N->getOperand(3));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
- SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
-
- if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
EVT SVT = N->getOperand(0).getValueType();
@@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
// Truncate the result if the libcall returns a larger type.
return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),
- 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, use it.
if (!NewRHS.getNode()) {
@@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
@@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
@@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
@@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
- false, dl).first;
+ CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "\n");
SDValue R = SDValue();
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
switch (N->getOpcode()) {
// These opcodes cannot appear if promotion of FP16 is done in the backend
// instead of Clang
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 15ac45c37c66..d5c1b539adbd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
@@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
@@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
- SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
@@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
- return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
@@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
- // For promoting iN -> iM, this can be expanded by
- // 1. ANY_EXTEND iN to iM
- // 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
- // 4. L/ASHR by M-N
+ // If the promoted type is legal, we can convert this to:
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ // Else it is more efficient to convert this to a min and a max
+ // operation in the higher precision arithmetic.
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
- unsigned ShiftOp;
- switch (Opcode) {
- case ISD::SADDSAT:
- case ISD::SSUBSAT:
- ShiftOp = ISD::SRA;
- break;
- case ISD::UADDSAT:
- case ISD::USUBSAT:
- ShiftOp = ISD::SRL;
- break;
- default:
- llvm_unreachable("Expected opcode to be signed or unsigned saturation "
- "addition or subtraction");
- }
-
- SDValue Op1Promoted = GetPromotedInteger(Op1);
- SDValue Op2Promoted = GetPromotedInteger(Op2);
+ SDValue Op1Promoted, Op2Promoted;
+ if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ Op1Promoted = ZExtPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else {
+ Op1Promoted = SExtPromotedInteger(Op1);
+ Op2Promoted = SExtPromotedInteger(Op2);
+ }
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
- unsigned SHLAmount = NewBits - OldBits;
- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
- Op1Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
- Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
- SDValue Result =
- DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ } else {
+ if (Opcode == ISD::USUBSAT) {
+ SDValue Max =
+ DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
+ }
+
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
+ }
}
SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
@@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
SDValue Op1Promoted, Op2Promoted;
bool Signed =
N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating =
+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
unsigned DiffSize =
PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
if (Saturating) {
// Promoting the operand and result values changes the saturation width,
// which is extends the values that we clamp to on saturation. This could be
@@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
@@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
GetPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
+ // operand in place.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
@@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
@@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
}
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
Lo, Hi);
}
@@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
- EVT BoolVT = getSetCCResultType(VT);
- SDValue Zero = DAG.getConstant(0, dl, VT);
+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
+ N->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+
+ // Handle special case when scale is equal to zero.
if (!Scale) {
SDValue Result;
if (!Saturating) {
Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
} else {
- Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ EVT BoolVT = getSetCCResultType(VT);
+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
-
- APInt MinVal = APInt::getSignedMinValue(VTSize);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
- Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ if (Signed) {
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else {
+ // For unsigned multiplication, we only need to check the max since we
+ // can't really overflow towards zero.
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
}
SplitInteger(Result, Lo, Hi);
return;
}
+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].
+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue LL, LH, RL, RH;
GetExpandedInteger(LHS, LL, LH);
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- bool Signed = (N->getOpcode() == ISD::SMULFIX ||
- N->getOpcode() == ISD::SMULFIXSAT);
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
@@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
"the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- // Shift whole amount by scale.
- SDValue ResultLL = Result[0];
- SDValue ResultLH = Result[1];
- SDValue ResultHL = Result[2];
- SDValue ResultHH = Result[3];
-
- SDValue SatMax, SatMin;
- SDValue NVTZero = DAG.getConstant(0, dl, NVT);
- SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
- EVT BoolNVT = getSetCCResultType(NVT);
-
- // After getting the multplication result in 4 parts, we need to perform a
+ // After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
+ //
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
// 128 bits that are cut into 4 32-bit parts:
//
@@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
//
// |NVTSize-|
//
- // The resulting Lo and Hi will only need to be one of these 32-bit parts
- // after shifting.
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
+
+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.
+ if (!Saturating)
+ return;
+
+ // Can not overflow when there is no integer part.
+ if (Scale == VTSize)
+ return;
+
+ // To handle saturation we must check for overflow in the multiplication.
+ //
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
+ // aren't all zeroes.
+ //
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
+ // aren't all ones or all zeroes.
+ //
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of signed
+ // saturation.
+
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
+ if (!Signed) {
+ if (Scale < NVTSize) {
+ // Overflow happened if ((HH | (HL >> Scale)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
+ } else if (Scale == NVTSize) {
+ // Overflow happened if (HH != 0).
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
+ } else if (Scale < VTSize) {
+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale - NVTSize, dl,
+ ShiftTy));
+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
+ } else
+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
+ "(and saturation can't happen with Scale==VTSize).");
+
+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
+ return;
+ }
+
if (Scale < NVTSize) {
- // If the scale is less than the size of the VT we expand to, the Hi and
- // Lo of the result will be in the first 2 parts of the result after
- // shifting right. This only requires shifting by the scale as far as the
- // third part in the result (ResultHL).
- SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
-
- // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
- // highest bit of HH determines saturation direction in the event of
- // saturation.
// The number of overflow bits we can check are VTSize - Scale + 1 (we
// include the sign bit). If these top bits are > 0, then we overflowed past
// the max value. If these top bits are < -1, then we overflowed past the
// min value. Otherwise, we did not overflow.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
- "Extent of overflow bits must start within HL");
- SDValue HLHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
- SDValue HLLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
-
- // HH > 0 or HH == 0 && HL > HLLoMask
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLPos =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
-
- // HH < -1 or HH == -1 && HL < HLHiMask
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLNeg =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
- }
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
} else if (Scale == NVTSize) {
- // If the scales are equal, Lo and Hi are ResultLH and Result HL,
- // respectively. Avoid shifting to prevent undefined behavior.
- Lo = ResultLH;
- Hi = ResultHL;
-
- // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
- // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
- if (Saturating) {
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
-
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
- }
+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
} else if (Scale < VTSize) {
- // If the scale is instead less than the old VT size, but greater than or
- // equal to the expanded VT size, the first part of the result (ResultLL) is
- // no longer a part of Lo because it would be scaled out anyway. Instead we
- // can start shifting right from the fourth part (ResultHH) to the second
- // part (ResultLH), and Result LH will be the new Lo.
- SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
-
// This is similar to the case when we saturate if Scale < NVTSize, but we
- // only need to chech HH.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- SDValue HHHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
- SDValue HHLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
-
- SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
- SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
- }
- } else if (Scale == VTSize) {
- assert(
- !Signed &&
- "Only unsigned types can have a scale equal to the operand bit width");
-
- Lo = ResultHL;
- Hi = ResultHH;
- } else {
- llvm_unreachable("Expected the scale to be less than or equal to the width "
- "of the operands");
- }
+ // only need to check HH.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ } else
+ llvm_unreachable("Illegal scale for signed fixed point mul.");
- if (Saturating) {
- APInt LHMax = APInt::getSignedMaxValue(NVTSize);
- APInt LLMax = APInt::getAllOnesValue(NVTSize);
- APInt LHMin = APInt::getSignedMinValue(NVTSize);
- Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
- Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
- Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
- Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
- }
+ // Saturate to signed maximum.
+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
+ // Saturate to signed minimum.
+ APInt MinHi = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
@@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
return;
}
@@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
@@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue SplatVal = N->getOperand(0);
+
+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 14fd5be23ccb..b596c174a287 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
SDValue Res(&Node, i);
- EVT VT = Res.getValueType();
bool Failed = false;
// Don't create a value in map.
auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
@@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
}
} else {
- // If the value can be kept in HW registers, softening machinery can
- // leave it unchanged and don't put it to any map.
- if (Mapped == 0 &&
- !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&
- isLegalInHWReg(VT))) {
+ if (Mapped == 0) {
dbgs() << "Processed value not in any map!";
Failed = true;
} else if (Mapped & (Mapped - 1)) {
@@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- Changed = SoftenFloatResult(N, i);
- if (Changed)
- goto NodeDone;
- // If not changed, the result type should be legally in register.
- assert(isLegalInHWReg(ResultVT) &&
- "Unchanged SoftenFloatResult should be legal in register!");
- goto ScanOperands;
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -439,15 +430,9 @@ NodeDone:
bool Failed = false;
// Check that all result types are legal.
- // A value type is illegal if its TypeAction is not TypeLegal,
- // and TLI.RegClassForVT does not have a register class for this type.
- // For example, the x86_64 target has f128 that is not TypeLegal,
- // to have softened operators, but it also has FR128 register class to
- // pass and return f128 values. Hence a legalized node can have f128 type.
if (!IgnoreNodeResults(&Node))
for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(Node.getValueType(i)) &&
- !TLI.isTypeLegal(Node.getValueType(i))) {
+ if (!isTypeLegal(Node.getValueType(i))) {
dbgs() << "Result type " << i << " illegal: ";
Node.dump(&DAG);
Failed = true;
@@ -456,8 +441,7 @@ NodeDone:
// Check that all operand types are legal.
for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
- !isTypeLegal(Node.getOperand(i).getValueType()) &&
- !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ !isTypeLegal(Node.getOperand(i).getValueType())) {
dbgs() << "Operand type " << i << " illegal: ";
Node.getOperand(i).dump(&DAG);
Failed = true;
@@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- // f128 of x86_64 could be kept in SSE registers,
- // but sometimes softened to i128.
- assert((Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
- // Allow repeated calls to save f128 type nodes
- // or any node with type that transforms to itself.
- // Many operations on these types are not softened.
- assert(((OpIdEntry == 0) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
- "Node is already converted to integer!");
+ assert((OpIdEntry == 0) && "Node is already converted to integer!");
OpIdEntry = getTableId(Result);
}
@@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
/// Convert the node into a libcall with the same prototype.
SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
bool isSigned) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
}
/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1d489b1b3a33..4afbae69128a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -73,15 +73,6 @@ private:
return VT.isSimple() && TLI.isTypeLegal(VT);
}
- /// Return true if this type can be passed in registers.
- /// For example, x86_64's f128, should to be legally in registers
- /// and only some operations converted to library calls or integer
- /// bitwise operations.
- bool isLegalInHWReg(EVT VT) const {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- return VT == NVT && isSimpleLegalType(VT);
- }
-
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -306,6 +297,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -363,6 +355,7 @@ private:
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
@@ -472,14 +465,11 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// Given an operand Op of Float type, returns the integer if the Op is not
- /// supported in target HW and converted to the integer.
- /// The integer contains exactly the same bits as Op - only the type changed.
- /// For example, if Op is an f32 which was softened to an i32, then this
- /// method returns an i32, the bits of which coincide with those of Op.
- /// If the Op can be efficiently supported in target HW or the operand must
- /// stay in a register, the Op is not converted to an integer.
- /// In that case, the given op is returned.
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op
SDValue GetSoftenedFloat(SDValue Op) {
TableId Id = getTableId(Op);
auto Iter = SoftenedFloats.find(Id);
@@ -494,19 +484,19 @@ private:
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Convert Float Results to Integer for Non-HW-supported Operations.
- bool SoftenFloatResult(SDNode *N, unsigned ResNo);
+ // Convert Float Results to Integer.
+ void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -518,7 +508,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -531,27 +521,17 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Return true if we can skip softening the given operand or SDNode because
- // either it was soften before by SoftenFloatResult and references to the
- // operand were replaced by ReplaceValueWith or it's value type is legal in HW
- // registers and the operand can be left unchanged.
- bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
-
- // Convert Float Operand to Integer for Non-HW-supported Operations.
+ // Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
- SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
- SDValue SoftenFloatOp_FABS(SDNode *N);
- SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
- SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
@@ -559,7 +539,6 @@ private:
SDValue SoftenFloatOp_LLROUND(SDNode *N);
SDValue SoftenFloatOp_LRINT(SDNode *N);
SDValue SoftenFloatOp_LLRINT(SDNode *N);
- SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -715,6 +694,7 @@ private:
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
@@ -830,6 +810,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
@@ -933,6 +914,8 @@ private:
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVSETCC(const SDNode *N);
+
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 943f63f46c47..5562f400b6e1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat: {
- // Expand the floating point operand only if it was converted to integers.
- // Otherwise, it is a legal type like f128 that can be saved in a register.
- auto SoftenedOp = GetSoftenedFloat(InOp);
- if (isLegalInHWReg(SoftenedOp.getValueType()))
- break;
- SplitInteger(SoftenedOp, Lo, Hi);
+ case TargetLowering::TypeSoftenFloat:
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
- SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
@@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
if (Cond.getValueType().isVector()) {
if (SDValue Res = WidenVSELECTAndMask(N))
std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
- // It seems to improve code to generate two narrow SETCCs as opposed to
- // splitting a wide result vector.
- else if (Cond.getOpcode() == ISD::SETCC)
- std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
- else
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 10b8b705869e..15c3a0b6cfad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
@@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // If we're asked to expand a strict vector floating-point operation,
+ // by default we're going to simply unroll it. That is usually the
+ // best approach, except in the case where the resulting strict (scalar)
+ // operations would themselves use the fallback mutation to non-strict.
+ // In that specific case, just do the fallback on the vector op.
+ if (Action == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal) {
+ EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Legal)
+ Action = TargetLowering::Legal;
+ }
break;
case ISD::ADD:
case ISD::SUB:
@@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
break;
}
- case ISD::FP_ROUND_INREG:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<VTSDNode>(Node->getOperand(1))->getVT());
- break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::VECREDUCE_ADD:
@@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::SMULFIX:
case ISD::UMULFIX:
return ExpandFixedPointMul(Op);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIXSAT:
+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
+ // why. Maybe it results in worse codegen compared to the unroll for some
+ // targets? This should probably be investigated. And if we still prefer to
+ // unroll an explanation could be helpful.
+ return DAG.UnrollVectorOp(Op.getNode());
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
return ExpandStrictFPOp(Op);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) {
SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
// Attempt to expand using TargetLowering.
- SDValue Result;
- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+ SDValue Result, Chain;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
+ if (Op.getNode()->isStrictFPOpcode())
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
return Result;
+ }
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7e4d52617977..3763e886cef2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
@@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
@@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
@@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ { N->getValueType(0).getScalarType(), MVT::Other },
+ { N->getOperand(0), Elt });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
@@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
@@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
- MMO);
+ MMO, MGT->getIndexType());
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
- MMO);
+ MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
- OpsLo, MMO);
+ OpsLo, MMO, MGT->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
- OpsHi, MMO);
+ OpsHi, MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO);
+ DL, OpsLo, MMO, N->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
@@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
// after another.
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ DL, OpsHi, MMO, N->getIndexType());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
- return PromoteTargetBoolean(Con, N->getValueType(0));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
}
@@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
@@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ // These are binary operations, but with an extra operand that shouldn't
+ // be widened (the scale).
+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);
+ break;
+
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
Res = WidenVecRes_Convert_StrictFP(N);
break;
@@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
+ // Binary op widening, but with an extra operand that shouldn't be widened.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
+ N->getFlags());
+}
+
// Given a vector of operations that have been broken up to widen, see
// if we can collect them together into the next widest legal VT. This
// implementation is trap-safe.
@@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
@@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
Scale};
SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
- MG->getMemOperand());
+ MG->getMemOperand(), MG->getIndexType());
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
return SDValue();
@@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand());
+ MSC->getMemOperand(), MSC->getIndexType());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return PromoteTargetBoolean(CC, VT);
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, dl, VT, CC);
}
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
@@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
- unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2cb850fa1a3d..7ee44c808fcb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
} else
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 34b4c8502353..ff806bdb822c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!Pred.isArtificial())
AddPredQueued(NewSU, Pred);
+ // Make sure the clone comes after the original. (InstrEmitter assumes
+ // this ordering.)
+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
+
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
@@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
@@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
unsigned Reg =
cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
unsigned Reg =
cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
SDNode *PredFrameSetup = nullptr;
@@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
// Perform checks on the successors of PredSU.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 568c6191e512..d4c1fb36475e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return;
unsigned ResNo = User->getOperand(2).getResNo();
@@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// glued together nodes with a single SUnit.
-void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
// Populate the SUnits array.
@@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
@@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
(void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
@@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+
GluedNodes.pop_back();
}
auto NewInsn =
@@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
}
// Insert all the dbg_values which have not already been inserted in source
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5163b4fa4fd3..183ce4b0652d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -26,6 +26,7 @@
namespace llvm {
+class AAResults;
class InstrItineraryData;
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
@@ -93,7 +94,7 @@ class InstrItineraryData;
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AAResults *AA);
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index ab06b55b49fd..e7bac73678a7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -63,14 +63,13 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
- /// AA - AliasAnalysis for making memory reference queries.
- AliasAnalysis *AA;
+ /// AA - AAResults for making memory reference queries.
+ AAResults *AA;
public:
- ScheduleDAGVLIW(MachineFunction &mf,
- AliasAnalysis *aa,
+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetSubtargetInfo &STI = mf.getSubtarget();
HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5852e693fa9f..52a71b91d93f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
break;
case ISD::TargetExternalSymbol: {
ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
- Erased = TargetExternalSymbols.erase(
- std::pair<std::string,unsigned char>(ESN->getSymbol(),
- ESN->getTargetFlags()));
+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(
+ ESN->getSymbol(), ESN->getTargetFlags()));
break;
}
case ISD::MCSymbol: {
@@ -1084,6 +1083,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
+ SDCallSiteDbgInfo.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
EVT VT, int64_t Offset, bool isTargetGA,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
@@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
}
SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
@@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
}
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
ID.AddInteger(Index);
@@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
}
SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
SDNode *&N =
- TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
- TargetFlags)];
+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
if (N) return SDValue(N, 0);
N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
InsertNode(N);
@@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
}
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset,
- bool isTarget,
- unsigned char TargetFlags) {
+ int64_t Offset, bool isTarget,
+ unsigned TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
@@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {
EVT VT = Node->getValueType(0);
SDValue Tmp1 = Node->getOperand(0);
SDValue Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
+ const MaybeAlign MA(Node->getConstantOperandVal(3));
SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
Tmp2, MachinePointerInfo(V));
SDValue VAList = VAListLoad;
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {
VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- getConstant(Align - 1, dl, VAList.getValueType()));
+ getConstant(MA->value() - 1, dl, VAList.getValueType()));
- VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ VAList =
+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
@@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
}
case ISD::OR:
case ISD::XOR:
- // If the LHS or RHS don't contribute bits to the or, drop them.
- if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
- return V.getOperand(1);
- if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
- return V.getOperand(0);
- break;
+ case ISD::SIGN_EXTEND_INREG:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
@@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
- case ISD::SIGN_EXTEND_INREG:
- EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
- unsigned ExVTBits = ExVT.getScalarSizeInBits();
-
- // If none of the extended bits are demanded, eliminate the sextinreg.
- if (DemandedBits.getActiveBits() <= ExVTBits)
- return V.getOperand(0);
-
- break;
}
return SDValue();
}
@@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.ult(V.getScalarValueSizeInBits()))
+ if (ShAmt.ult(BitWidth))
return &ShAmt;
}
return nullptr;
}
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the minimum value.
+static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MinShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MinShAmt && MinShAmt->ule(ShAmt))
+ continue;
+ MinShAmt = &ShAmt;
+ }
+ return MinShAmt;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
@@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
return Known;
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return Known; // Limit search depth.
KnownBits Known2;
@@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
- } else {
- Known = computeKnownBits(Src, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
break;
}
case ISD::SCALAR_TO_VECTOR: {
@@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
- } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
- // If the shift amount is a vector of constants see if we can bound
- // the number of upper zero bits.
- unsigned ShiftAmountMin = BitWidth;
- for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
- if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
- const APInt &ShAmt = C->getAPIntValue();
- if (ShAmt.ult(BitWidth)) {
- ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
- ShAmt.getZExtValue());
- continue;
- }
- }
- // Don't know anything.
- ShiftAmountMin = 0;
- break;
- }
-
- Known.Zero.setHighBits(ShiftAmountMin);
+ } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
+ // Minimum shift high bits are known zero.
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
}
break;
case ISD::SRA:
@@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
+ if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
+ if (Known2.isNegative() && LowBits.intersects(Known2.One))
Known.One |= ~LowBits;
assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
}
@@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Val.getNumSignBits();
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
if (!DemandedElts)
@@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
+ case ISD::MUL: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (SignBitsOp0 == 1)
+ break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (SignBitsOp1 == 1)
+ break;
+ unsigned OutValidBits =
+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
+ }
case ISD::TRUNCATE: {
// Check if the sign bits of source go down as far as the truncated value.
unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
@@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
- return ComputeNumSignBits(Src, Depth + 1);
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
@@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
// TODO: Handle vectors.
@@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
Operand.getOperand(0), Flags);
@@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2C && N2C->isNullValue())
return N1;
break;
- case ISD::FP_ROUND_INREG: {
- EVT EVT = cast<VTSDNode>(N2)->getVT();
- assert(VT == N1.getValueType() && "Not an inreg round!");
- assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
- "Cannot FP_ROUND_INREG integer types");
- assert(EVT.isVector() == VT.isVector() &&
- "FP_ROUND_INREG type should be vector iff the operand "
- "type is vector!");
- assert((!EVT.isVector() ||
- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
- "Vector element counts must match in FP_ROUND_INREG");
- assert(EVT.bitsLE(VT) && "Not rounding down!");
- (void)EVT;
- if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
- break;
- }
case ISD::FP_ROUND:
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
@@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
std::swap(N1, N2);
} else {
switch (Opcode) {
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
@@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align,
+ uint64_t Size, unsigned Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Align > SrcAlign)
- SrcAlign = Align;
+ if (Alignment > SrcAlign)
+ SrcAlign = Alignment;
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
@@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Align &&
- DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign /= 2;
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
- if (NewAlign > Align) {
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode()) {
- Store = DAG.getStore(Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align,
- MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
}
}
@@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
- MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
createOperands(N, Ops);
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
@@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
}
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
@@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getMask().getValueType().getVectorNumElements() ==
@@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
+ case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
+ case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
+ case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
}
assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
CSEMap.InsertNode(N, IP);
InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);
return N;
}
@@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
- if (!OldLoad->hasAnyUseOfValue(1))
+ if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
return NewChain;
SDValue TokenFactor =
@@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() {
GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
const DebugLoc &DL,
const GlobalValue *GA, EVT VT,
- int64_t o, unsigned char TF)
+ int64_t o, unsigned TF)
: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
@@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
// Loads don't have side effects, look through them.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
- if (!Ld->isVolatile())
+ if (Ld->isUnordered())
return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
}
return false;
@@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
SDValue
SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps) {
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials) {
// The pattern must end in an extract from index 0.
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isNullConstant(Extract->getOperand(1)))
return SDValue();
- SDValue Op = Extract->getOperand(0);
- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
// Match against one of the candidate binary ops.
+ SDValue Op = Extract->getOperand(0);
if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
return Op.getOpcode() == unsigned(BinOp);
}))
return SDValue();
+ // Floating-point reductions may require relaxed constraints on the final step
+ // of the reduction because they may reorder intermediate operations.
+ unsigned CandidateBinOp = Op.getOpcode();
+ if (Op.getValueType().isFloatingPoint()) {
+ SDNodeFlags Flags = Op->getFlags();
+ switch (CandidateBinOp) {
+ case ISD::FADD:
+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unhandled FP opcode for binop reduction");
+ }
+ }
+
+ // Matching failed - attempt to see if we did enough stages that a partial
+ // reduction from a subvector is possible.
+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {
+ if (!AllowPartials || !Op)
+ return SDValue();
+ EVT OpVT = Op.getValueType();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);
+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
+ return SDValue();
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ };
+
// At each stage, we're looking for something that looks like:
// %s = shufflevector <8 x i32> %op, <8 x i32> undef,
// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
@@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// <4,5,6,7,u,u,u,u>
// <2,3,u,u,u,u,u,u>
// <1,u,u,u,u,u,u,u>
- unsigned CandidateBinOp = Op.getOpcode();
+ // While a partial reduction match would be:
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+ SDValue PrevOp;
for (unsigned i = 0; i < Stages; ++i) {
+ unsigned MaskEnd = (1 << i);
+
if (Op.getOpcode() != CandidateBinOp)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// The first operand of the shuffle should be the same as the other operand
// of the binop.
if (!Shuffle || Shuffle->getOperand(0) != Op)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
// Verify the shuffle has the expected (at this stage of the pyramid) mask.
- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
- return SDValue();
+ for (int Index = 0; Index < (int)MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))
+ return PartialReduction(PrevOp, MaskEnd);
+
+ PrevOp = Op;
}
BinOp = (ISD::NodeType)CandidateBinOp;
@@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
getShiftAmountOperand(Operands[0].getValueType(),
Operands[1])));
break;
- case ISD::SIGN_EXTEND_INREG:
- case ISD::FP_ROUND_INREG: {
+ case ISD::SIGN_EXTEND_INREG: {
EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
Operands[0],
@@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
int Dist) const {
if (LD->isVolatile() || Base->isVolatile())
return false;
+ // TODO: probably too restrictive for atomics, revisit
+ if (!LD->isSimple())
+ return false;
if (LD->isIndexed() || Base->isIndexed())
return false;
if (LD->getChain() != Base->getChain())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 9592bc30a4e1..3a53ab9717a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include <cstdint>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e818dd27c05e..8c15563fcd23 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ if (!Register::isVirtualRegister(Regs[Part + i]) ||
!RegisterVT.isInteger())
continue;
@@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
- else if (!Regs.empty() &&
- TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
// assembly as well as normal instructions.
@@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
- SDValue Val = RetOp.getValue(i);
+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
@@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
- !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ !I.hasMetadata(LLVMContext::MD_unpredictable) &&
(Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
@@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- None, false, getCurSDLoc(), false, false).second;
+ None, CallOptions, getCurSDLoc()).second;
// On PS4, the "return address" must still be within the calling function,
// even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
@@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
- // Subtract the minimum value
+ // Subtract the minimum value.
SDValue SwitchOp = getValue(B.SValue);
EVT VT = SwitchOp.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
- DAG.getConstant(B.First, dl, VT));
-
- // Check range
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue RangeCmp = DAG.getSetCC(
- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+ SDValue RangeSub =
+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
// Determine the type of the test operands.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool UsePtrType = false;
- if (!TLI.isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT)) {
UsePtrType = true;
- else {
+ } else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
@@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
break;
}
}
+ SDValue Sub = RangeSub;
if (UsePtrType) {
VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
@@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, CopyTo, RangeCmp,
- DAG.getBasicBlock(B.Default));
+ SDValue Root = CopyTo;
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ SDValue RangeCmp = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ RangeSub.getValueType()),
+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
+ ISD::SETUGT);
+
+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+ }
// Avoid emitting unnecessary branches to the next block.
if (MBB != NextBlock(SwitchBB))
- BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
- DAG.getBasicBlock(MBB));
+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
- DAG.setRoot(BrRange);
+ DAG.setRoot(Root);
}
/// visitBitTestCase - this function produces one "bit test"
@@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// We care about the legality of the operation after it has been type
// legalized.
- while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
- VT != TLI.getTypeToTransformTo(Ctx, VT))
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
@@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
+ Constant *MaskV = cast<Constant>(I.getOperand(2));
SDLoc DL = getCurSDLoc();
-
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
- unsigned MaskNumElts = Mask.size();
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ if (MaskV->isNullValue() && VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src1,
+ DAG.getConstant(0, DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned MaskNumElts = Mask.size();
+
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
@@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = I.getType()->isVectorTy() ?
- cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+ I.getType()->getVectorNumElements() : 0;
if (VectorWidth && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
@@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
- const auto *CI = dyn_cast<ConstantInt>(Idx);
- if (!CI && isa<ConstantDataVector>(Idx) &&
- cast<ConstantDataVector>(Idx)->getSplatValue())
- CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+ const auto *C = dyn_cast<Constant>(Idx);
+ if (C && isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
- if (CI) {
+ if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
@@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
DAG.getConstant(Offs, dl, IdxTy);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
@@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
+ bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
bool isDereferenceable =
isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
@@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- unsigned VReg =
+ Register VReg =
SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
@@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
"call visitLoadFromSwiftError when backend supports swifterror");
assert(!I.isVolatile() &&
- I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
- I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&
+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&
"Support volatile, non temporal, invariant for load_from_swift_error");
const Value *SV = I.getOperand(0);
@@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
auto MMOFlags = MachineMemOperand::MONone;
if (I.isVolatile())
MMOFlags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
MMOFlags |= MachineMemOperand::MONonTemporal;
MMOFlags |= TLI.getMMOFlags(I);
@@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
- SDValue &Scale, SelectionDAGBuilder* SDB) {
+static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+ ISD::MemIndexType &IndexType, SDValue &Scale,
+ SelectionDAGBuilder *SDB) {
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
@@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
// Ensure all the other indices are 0.
for (unsigned i = 1; i < FinalIndex; ++i) {
- auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!C || !C->isZero())
+ auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
return false;
}
@@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
@@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
@@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
bool ConstantMemory = false;
if (UniformBase && AA &&
AA->pointsToConstantMemory(
@@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
SDValue OutChain = Gather.getValue(1);
if (!ConstantMemory)
@@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = MachineMemOperand::MOLoad;
if (I.isVolatile())
Flags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_invariant_load))
Flags |= MachineMemOperand::MOInvariant;
if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
DAG.getDataLayout()))
@@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
- getValue(I.getPointerOperand()), MMO);
+
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for loads to prevent future divergence.
+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ SDValue OutChain = L.getValue(1);
+ if (!I.isUnordered())
+ DAG.setRoot(OutChain);
+ else
+ PendingLoads.push_back(OutChain);
+ return;
+ }
+
+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
+ Ptr, MMO);
SDValue OutChain = L.getValue(1);
if (MemVT != VT)
@@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+ SDValue Ptr = getValue(I.getPointerOperand());
+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for stores to prevent future divergence.
+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ DAG.setRoot(S);
+ return;
+ }
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
- getValue(I.getPointerOperand()), Val, MMO);
+ Ptr, Val, MMO);
DAG.setRoot(OutChain);
@@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- SDValue Op = getValue(I.getArgOperand(i));
- Ops.push_back(Op);
+ const Value *Arg = I.getArgOperand(i);
+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {
+ Ops.push_back(getValue(Arg));
+ continue;
+ }
+
+ // Use TargetConstant instead of a regular constant for immarg.
+ EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
+ } else {
+ Ops.push_back(
+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
+ }
}
SmallVector<EVT, 4> ValueVTs;
@@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- Result =
- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Result = DAG.getMemIntrinsicNode(
+ Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
// Put the exponent in the right bit position for later addition to the
// final result:
//
- // #define LOG2OFe 1.4426950f
- // t0 = Op * LOG2OFe
+ // t0 = Op * log2(e)
// TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
- getF32Constant(DAG, 0x3fb8aa3b, dl));
+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
@@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
- // Scale the exponent by log(2) [0.69314718f].
+ // Scale the exponent by log(2).
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
- SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
- getF32Constant(DAG, 0x3f317218, dl));
+ SDValue LogOfExponent =
+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
// Get the significand and build it into a floating-point number with
// exponent of 1.
@@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getUnderlyingArgReg - Find underlying register used for a truncated or
-// bitcasted argument.
-static unsigned getUnderlyingArgReg(const SDValue &N) {
+// getUnderlyingArgRegs - Find underlying registers used for a truncated,
+// bitcasted, or split argument. Returns a list of <Register, size in bits>
+static void
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ const SDValue &N) {
switch (N.getOpcode()) {
- case ISD::CopyFromReg:
- return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::CopyFromReg: {
+ SDValue Op = N.getOperand(1);
+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
+ Op.getValueType().getSizeInBits());
+ return;
+ }
case ISD::BITCAST:
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::TRUNCATE:
- return getUnderlyingArgReg(N.getOperand(0));
+ getUnderlyingArgRegs(Regs, N.getOperand(0));
+ return;
+ case ISD::BUILD_PAIR:
+ case ISD::BUILD_VECTOR:
+ case ISD::CONCAT_VECTORS:
+ for (SDValue Op : N->op_values())
+ getUnderlyingArgRegs(Regs, Op);
+ return;
default:
- return 0;
+ return;
}
}
@@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
+ SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
- unsigned Reg = getUnderlyingArgReg(N);
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ getUnderlyingArgRegs(ArgRegsAndSizes, N);
+ Register Reg;
+ if (ArgRegsAndSizes.size() == 1)
+ Reg = ArgRegsAndSizes.front().first;
+
+ if (Reg && Reg.isVirtual()) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ Register PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
Reg = PR;
}
@@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (!Op) {
+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
+ auto splitMultiRegDbgValue
+ = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ unsigned Offset = 0;
+ for (auto RegAndSize : SplitRegs) {
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ RegAndSize.first, Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
+ }
+ };
+
// Check if ValueMap has reg number.
- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, unsigned>::const_iterator
+ VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), getABIRegCopyCC(V));
if (RFV.occupiesMultipleRegs()) {
- unsigned Offset = 0;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
- Op = MachineOperand::CreateReg(RegAndSize.first, false);
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegAndSize.second);
- if (!FragmentExpr)
- continue;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- Op->getReg(), Variable, *FragmentExpr));
- Offset += RegAndSize.second;
- }
+ splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
}
+
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
+ } else if (ArgRegsAndSizes.size() > 1) {
+ // This was split due to the calling convention, and no virtual register
+ // mapping exists for the value.
+ splitMultiRegDbgValue(ArgRegsAndSizes);
+ return true;
}
}
@@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ if (IsIndirect)
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
*Op, Variable, Expr));
return true;
@@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ TLI.getFrameIndexTy(DAG.getDataLayout())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout()),
+ TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::read_register: {
@@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = getValue(I.getArgOperand(1));
- if (isa<ConstantSDNode>(ShAmt)) {
- visitTargetIntrinsic(I, Intrinsic);
- return;
- }
- unsigned NewIntrinsic = 0;
- EVT ShAmtVT = MVT::v2i32;
- switch (Intrinsic) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntrinsic = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntrinsic = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- // We must do this early because v2i32 is not a legal type.
- SDValue ShOps[2];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
- DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
- getValue(I.getArgOperand(0)), ShAmt);
- setValue(&I, Res);
- return;
- }
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
@@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
@@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op3));
return;
}
+ case Intrinsic::umul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
@@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res);
return;
}
- case Intrinsic::objectsize: {
- // If we don't know by now, we're never going to know.
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
-
- assert(CI && "Non-constant type in __builtin_object_size?");
-
- SDValue Arg = getValue(I.getCalledValue());
- EVT Ty = Arg.getValueType();
-
- if (CI->isZero())
- Res = DAG.getConstant(-1ULL, sdl, Ty);
- else
- Res = DAG.getConstant(0, sdl, Ty);
-
- setValue(&I, Res);
- return;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
case Intrinsic::is_constant:
- // If this wasn't constant-folded away by now, then it's not a
- // constant.
- setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
- return;
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
@@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Val);
return;
}
+ case Intrinsic::ptrmask: {
+ SDValue Ptr = getValue(I.getOperand(0));
+ SDValue Const = getValue(I.getOperand(1));
+
+ EVT DestVT =
+ EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ return;
+ }
}
}
@@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
+ case Intrinsic::experimental_constrained_fptosi:
+ Opcode = ISD::STRICT_FP_TO_SINT;
+ break;
+ case Intrinsic::experimental_constrained_fptoui:
+ Opcode = ISD::STRICT_FP_TO_UINT;
+ break;
case Intrinsic::experimental_constrained_fptrunc:
Opcode = ISD::STRICT_FP_ROUND;
break;
@@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_log2:
Opcode = ISD::STRICT_FLOG2;
break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
case Intrinsic::experimental_constrained_rint:
Opcode = ISD::STRICT_FRINT;
break;
@@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_floor:
Opcode = ISD::STRICT_FFLOOR;
break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
case Intrinsic::experimental_constrained_round:
Opcode = ISD::STRICT_FROUND;
break;
@@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+ Register VReg = SwiftError.getOrCreateVRegDefAt(
CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
@@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(
+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
+ "integer constant expression");
+
ExtraInfo.update(T);
}
@@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
@@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
!OpInfo.isIndirect) ||
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass, and a target-defined fashion for C_Other). Find a
- // register that we can use.
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
CS, "couldn't allocate output register for constraint '" +
@@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
- if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(CS, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
@@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Val = OpInfo.AssignedRegs.getCopyFromRegs(
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
break;
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
@@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
@@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = Args[i].Alignment;
else
FrameAlign = getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Args[i].IsNest)
Flags.setNest();
@@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is an InlineAsm we have to match the registers required, not the
@@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment =
- TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
@@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FrameAlign = Arg.getParamAlignment();
else
FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
@@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setOrigAlign(OriginalAlignment);
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
+ if (Arg.hasAttribute(Attribute::Returned))
+ Flags.setReturned();
MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
@@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
- unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ Register SRetReg =
+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
NewRoot =
SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
@@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
@@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// FIXME: This isn't very clean... it would be nice to make this more
// general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
@@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
case CC_BitTests: {
- // FIXME: If Fallthrough is unreachable, skip the range check.
-
// FIXME: Optimize away range check based on pivot comparisons.
BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
@@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0072e33f23b7..bfcf30b430b6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -426,7 +426,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index da3049881d31..bc10f7621239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
@@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
case ISD::UMULFIX: return "umulfix";
+ case ISD::UMULFIXSAT: return "umulfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::LROUND: return "lround";
+ case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
+ case ISD::STRICT_LLROUND: return "strict_llround";
case ISD::LRINT: return "lrint";
+ case ISD::STRICT_LRINT: return "strict_lrint";
case ISD::LLRINT: return "llrint";
+ case ISD::STRICT_LLRINT: return "strict_llrint";
// Control flow instructions
case ISD::BR: return "br";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf9f2c166e1..1f07a241a824 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
- ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
@@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
bool hasFI = MI->getOperand(0).isFI();
Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
@@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Def->getParent()->insert(std::next(InsertPos), MI);
} else
LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ << Register::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
continue;
// Ignore non-integer values.
@@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
// Make sure that the copy dest is not a vreg when the copy source is a
// physical register.
- if (!OPI2->isReg() ||
- (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
- TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
return false;
return true;
@@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg =
+ Register Reg =
TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
Op->getOperand(2).getValueType(),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitCopyToReg: {
+ case OPC_EmitCopyToReg:
+ case OPC_EmitCopyToReg2: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+ if (Opcode == OPC_EmitCopyToReg2)
+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
if (!InputChain.getNode())
InputChain = CurDAG->getEntryNode();
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 395e9a8a4fc5..fad98b6f50dc 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
-#ifndef NDEBUG
// Right now we always allocate spill slots that are of the same
// size as the value we're about to spill (the size of spillee can
// vary since we spill vectors of pointers too). At some point we
@@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
-#endif
+ // Note: Using the alignment of the spill slot (rather than the abi or
+ // preferred alignment) is required for correctness when dealing with spill
+ // slots with preferred alignments larger than frame alignment..
auto &MF = Builder.DAG.getMachineFunction();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *StoreMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- PtrInfo);
+ StoreMMO);
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
@@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- SDValue SpillSlot =
- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+ unsigned Index = *DerivedPtrLocation;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
// Note: We know all of these reloads are independent, but don't bother to
// exploit that chain wise. DAGCombine will happily do so as needed, so
// doing it here would be a small compile time win at most.
SDValue Chain = getRoot();
- SDValue SpillLoad =
- DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- Relocate.getType()),
- getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- *DerivedPtrLocation));
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
+ SpillSlot, LoadMMO);
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b260cd91d468..9ab1324533f1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -37,7 +36,7 @@ using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
- : TargetLoweringBase(tm) {}
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
@@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
- unsigned Reg = ArgLoc.getLocReg();
+ Register Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
@@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
- ArrayRef<SDValue> Ops, bool isSigned,
- const SDLoc &dl, bool doesNotReturn,
- bool isReturnValueUsed,
- bool isPostTypeLegalization) const {
+ ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (SDValue Op : Ops) {
- Entry.Node = Op;
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ SDValue NewOp = Ops[i];
+ Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
+ CallOptions.IsSExt);
+ Entry.IsZExt = !Entry.IsSExt;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
+ Entry.IsSExt = Entry.IsZExt = false;
+ }
Args.push_back(Entry);
}
@@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
+ bool zeroExtend = !signExtend;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
+ signExtend = zeroExtend = false;
+ }
+
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn)
- .setDiscardResult(!isReturnValueUsed)
- .setIsPostTypeLegalization(isPostTypeLegalization)
+ .setNoReturn(CallOptions.DoesNotReturn)
+ .setDiscardResult(!CallOptions.IsReturnValueUsed)
+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
- .setZExtResult(!signExtend);
+ .setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
@@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
- const SDLoc &dl) const {
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS) const {
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
- dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { OldLHS.getValueType(),
+ OldRHS.getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
@@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
- dl).first;
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
@@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
AssumeSingleUse);
}
+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
+// TODO: Under what circumstances can we create nodes? Constant folding?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Ignore UNDEFs.
+ if (Op.isUndef())
+ return SDValue();
+
+ // Not demanding any bits/elts from Op.
+ if (DemandedBits == 0 || DemandedElts == 0)
+ return DAG.getUNDEF(Op.getValueType());
+
+ unsigned NumElts = DemandedElts.getBitWidth();
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::BITCAST: {
+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
+
+ if (NumSrcEltBits == NumDstEltBits)
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+
+ // TODO - bigendian once we have test coverage.
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumDstEltBits / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ // TODO - bigendian once we have test coverage.
+ if ((NumSrcEltBits % NumDstEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / NumDstEltBits;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * NumDstEltBits;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ break;
+ }
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
+ return Op.getOperand(0);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ IdentityLHS &= (M == (int)i);
+ IdentityRHS &= ((M - NumElts) == i);
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(Op.getValueType());
+ if (IdentityLHS)
+ return Op.getOperand(0);
+ if (IdentityRHS)
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits(
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
- } else if (Depth == 6) { // Limit search depth.
+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
+ case ISD::TargetConstant:
+ llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If index isn't constant, assume we need all the source vector elements.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
@@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits(
}
if (!!DemandedLHS || !!DemandedRHS) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
- if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
if (!!DemandedRHS) {
- if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
@@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
@@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits(
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
@@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(
+ APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
@@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
Known = Known.trunc(BitWidth);
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
@@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
- // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
- if (SrcVT.isVector() && NumSrcEltBits > 1 &&
- (BitWidth % NumSrcEltBits) == 0 &&
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
@@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
@@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
- SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
@@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
- // We can't guarantee that the new math op doesn't wrap, so explicitly
- // clear those flags to prevent folding with a potential existing node
- // that has those flags set.
- SDNodeFlags Flags;
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
@@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
// Limit search depth.
- if (Depth >= 6)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
SDLoc DL(Op);
@@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
APInt BaseElts = DemandedElts;
BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+
+ // If none of the base operand elements are demanded, replace it with undef.
+ if (!BaseElts && !Base.isUndef())
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT),
+ Op.getOperand(1),
+ Op.getOperand(2)));
+
if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
@@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Update legal shuffle masks based on demanded elements if it won't reduce
// to Identity which can cause premature removal of the shuffle mask.
- if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
- isShuffleMaskLegal(NewMask, VT))
- return TLO.CombineTo(Op,
- TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
- Op.getOperand(1), NewMask));
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
+ SDValue LegalShuffle =
+ buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
+ NewMask, TLO.DAG);
+ if (LegalShuffle)
+ return TLO.CombineTo(Op, LegalShuffle);
+ }
// Propagate undef/zero elements from LHS/RHS.
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
}
+void TargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, KnownBits &Known,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ Known.resetAll();
+}
+
void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
@@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
+SDValue
+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const {
+ bool LegalMask = isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(N0, N1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (!LegalMask)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
+}
+
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}
@@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
@@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
- SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
- DAG.getConstant(1, dl, CTVT));
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
}
@@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
- if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ if (Lod->isSimple() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
@@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isNullValue())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+ }
+
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.
@@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Back to non-vector simplifications.
// TODO: Can we do these for vector splats?
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const APInt &C1 = N1C->getAPIntValue();
+ EVT ShValTy = N0.getValueType();
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- (VT == N0.getValueType() ||
- (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
+ EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
- if (AndRHS->getAPIntValue().isPowerOf2()) {
+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();
+ if (AndRHS->getAPIntValue().isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
// Perform the xform if C1 is a single bit.
- if (C1.isPowerOf2()) {
+ unsigned ShCt = C1.logBase2();
+ if (C1.isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(C1.logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
}
}
@@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Fold remainder of division by a constant.
- if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
- if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
- return Folded;
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (N0.getOpcode() == ISD::UREM) {
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ } else if (N0.getOpcode() == ISD::SREM) {
+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+ }
}
// Fold away ALL boolean setcc's.
@@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
if (S == 1) {
switch (Constraint[0]) {
default: break;
- case 'r': return C_RegisterClass;
+ case 'r':
+ return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
@@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
- // If this is an 'other' constraint, see if the operand is valid for it.
- // For example, on X86 we might have an 'rI' constraint. If the operand
- // is an integer in the range [0..31] we want to use I (saving a load
- // of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other && Op.getNode()) {
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
@@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// If all values in Values that *don't* match the predicate are same 'splat'
+/// value, then replace all values with that splat value.
+/// Else, if AlternativeReplacement was provided, then replace all values that
+/// do match predicate with AlternativeReplacement value.
+static void
+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
+ std::function<bool(SDValue)> Predicate,
+ SDValue AlternativeReplacement = SDValue()) {
+ SDValue Replacement;
+ // Is there a value for which the Predicate does *NOT* match? What is it?
+ auto SplatValue = llvm::find_if_not(Values, Predicate);
+ if (SplatValue != Values.end()) {
+ // Does Values consist only of SplatValue's and values matching Predicate?
+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
+ return Value == *SplatValue || Predicate(Value);
+ })) // Then we shall replace values matching predicate with SplatValue.
+ Replacement = *SplatValue;
+ }
+ if (!Replacement) {
+ // Oops, we did not find the "baseline" splat value.
+ if (!AlternativeReplacement)
+ return; // Nothing to do.
+ // Let's replace with provided value then.
+ Replacement = AlternativeReplacement;
+ }
+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
+}
+
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
/// where the divisor is constant and the comparison target is zero,
/// return a DAG expression that will generate the same comparison result
@@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
- // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
// - P is the multiplicative inverse of D0 modulo 2^W
- // - Q = floor((2^W - 1) / D0)
+ // - Q = floor(((2^W) - 1) / D)
// where W is the width of the common type of N and D.
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
+ SelectionDAG &DAG = DCI.DAG;
+
EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Add non-uniform constant support.
- ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ // TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!Divisor || !CompTarget || Divisor->isNullValue() ||
- !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isNullValue())
return SDValue();
- const APInt &D = Divisor->getAPIntValue();
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+
+ auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ const APInt &D = C->getAPIntValue();
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ // D is even if it has trailing zeros.
+ HadEvenDivisor |= (K != 0);
+ // D is a power-of-two if D0 is one.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded.
+ if (D.isOneValue()) {
+ // Set P and K amount to a bogus values so we can try to splat them.
+ P = 0;
+ K = -1;
+ assert(Q.isAllOnesValue() &&
+ "Expecting all-ones comparison for one divisor");
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
- // Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
- bool DivisorIsEven = (K != 0);
- APInt D0 = D.lshr(K);
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ return SDValue();
- // The fold is invalid when D0 == 1.
- // This is reachable because visitSetCC happens before visitREM.
- if (D0.isOneValue())
+ // If this is a urem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
return SDValue();
- // P = inv(D0, 2^W)
- // 2^W requires W + 1 bits, so we have to extend and then truncate.
- unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ // If this is a urem by a powers-of-two, avoid the fold since it can be
+ // best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ SDValue PVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
- SelectionDAG &DAG = DCI.DAG;
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
- SDValue PVal = DAG.getConstant(P, DL, VT);
- SDValue QVal = DAG.getConstant(Q, DL, VT);
// (mul N, P)
- SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
- Created.push_back(Op1.getNode());
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
- // Rotate right only if D was even.
- if (DivisorIsEven) {
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
// We need ROTR to do this.
if (!isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
- SDValue ShAmt =
- DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
// UREM: (rotr (mul N, P), K)
- Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
- Created.push_back(Op1.getNode());
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+ return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
}
+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 7> Built;
+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ assert(Built.size() <= 7 && "Max size prediction failed.");
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // Fold:
+ // (seteq/ne (srem N, D), 0)
+ // To:
+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)
+ //
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
+ // - Q = floor((2 * A) / (2^K))
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isNullValue())
+ return SDValue();
+
+ bool HadIntMinDivisor = false;
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool NeedToApplyOffset = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
+
+ auto BuildSREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
+
+ // WARNING: this fold is only valid for positive divisors!
+ APInt D = C->getAPIntValue();
+ if (D.isNegative())
+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
+
+ HadIntMinDivisor |= D.isMinSignedValue();
+
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ if (!D.isMinSignedValue()) {
+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case
+ // we don't care about this lane in this fold, we'll special-handle it.
+ HadEvenDivisor |= (K != 0);
+ }
+
+ // D is a power-of-two if D0 is one. This includes INT_MIN.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // A = floor((2^(W - 1) - 1) / D0) & -2^K
+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);
+ A.clearLowBits(K);
+
+ if (!D.isMinSignedValue()) {
+ // If divisor INT_MIN, then we don't care about this lane in this fold,
+ // we'll special-handle it.
+ NeedToApplyOffset |= A != 0;
+ }
+
+ // Q = floor((2 * A) / (2^K))
+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
+
+ assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ "We are expecting that A is always less than all-ones for SVT");
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded. Likewise, we
+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.
+ if (D.isOneValue()) {
+ // Set P, A and K to a bogus values so we can try to splat them.
+ P = 0;
+ A = -1;
+ K = -1;
+
+ // x ?% 1 == 0 <--> true <--> x u<= -1
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ AAmts.push_back(DAG.getConstant(A, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
+ return SDValue();
+
+ // If this is a srem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
+ // since it can be best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, AVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn AAmts into a splat, since we don't care about the
+ // values that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, SVT));
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ AVal = DAG.getBuildVector(VT, DL, AAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ AVal = AAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ if (NeedToApplyOffset) {
+ // We need ADD to do this.
+ if (!isOperationLegalOrCustom(ISD::ADD, VT))
+ return SDValue();
+
+ // (add (mul N, P), A)
+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // SREM: (rotr (add (mul N, P), A), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
+ }
+
+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
+ SDValue Fold =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+
+ // If we didn't have lanes with INT_MIN divisor, then we're done.
+ if (!HadIntMinDivisor)
+ return Fold;
+
+ // That fold is only valid for positive divisors. Which effectively means,
+ // it is invalid for INT_MIN divisors. So if we have such a lane,
+ // we must fix-up results for said lanes.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+
+ if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustom(Cond, VT) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ Created.push_back(Fold.getNode());
+
+ SDValue IntMin = DAG.getConstant(
+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue IntMax = DAG.getConstant(
+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue Zero =
+ DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+
+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
+ Created.push_back(DivisorIsIntMin.getNode());
+
+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
+ Created.push_back(Masked.getNode());
+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
+ Created.push_back(MaskedIsZero.getNode());
+
+ // To produce final result we need to blend 2 vectors: 'SetCC' and
+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
+ // constant-folded, select can get lowered to a shuffle with constant mask.
+ SDValue Blended =
+ DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
+
+ return Blended;
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
+
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return 0;
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ return isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
+ }
+ case ISD::FADD:
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ return 0;
+
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (!V2)
+ return 0;
+
+ // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V01 = std::max(V0, V1);
+ return V01 ? std::max(V01, V2) : 0;
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ }
+
+ return 0;
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
+
+ assert(Depth <= SelectionDAG::MaxRecursionDepth &&
+ "getNegatedExpression doesn't match isNegatibleForFree");
+ const SDNodeFlags Flags = Op->getFlags();
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+ }
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
+ case ISD::FADD:
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(0), Flags);
+ case ISD::FSUB:
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
+ if (N0CFP->isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0), Flags);
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(
+ Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+ getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1),
+ Flags);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V0 >= V1) {
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ SDValue Neg0 = getNegatedExpression(
+ Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
+ Op.getOperand(1), Neg2, Flags);
+ }
+
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Neg1, Neg2, Flags);
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
+ }
+
+ llvm_unreachable("Unknown code");
+}
+
//===----------------------------------------------------------------------===//
// Legalization Utilities
//===----------------------------------------------------------------------===//
@@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
@@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
+ if (Node->isStrictFPOpcode())
+ // When a NaN is converted to an integer a trap is allowed. We can't
+ // use this expansion here because it would eliminate that trap. Other
+ // traps are also allowed and cannot be eliminated. See
+ // IEEE 754-2008 sec 5.8.
+ return false;
+
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
@@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
SDLoc dl(SDValue(Node, 0));
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
// Only expand vector types if we have the appropriate vector bit operations.
- if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ ISD::FP_TO_SINT;
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
return false;
@@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
- Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ if (Node->isStrictFPOpcode()) {
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
- bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+ bool Strict = Node->isStrictFPOpcode() ||
+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
if (Strict) {
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
@@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Result = fp_to_sint(Val) ^ Ofs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
- DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ SDValue SrcBiased;
+ if (Node->isStrictFPOpcode())
+ SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Node->getOperand(0), Src, Cst });
+ else
+ SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
DAG.getConstant(SignMask, dl, DstVT));
- Result = DAG.getNode(ISD::XOR, dl, DstVT,
- DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+ SDValue SInt;
+ if (Node->isStrictFPOpcode()) {
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { SrcBiased.getValue(1), Val });
+ Chain = SInt.getValue(1);
+ } else
+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -5918,7 +6960,8 @@ SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
Node->getOpcode() == ISD::UMULFIX ||
- Node->getOpcode() == ISD::SMULFIXSAT) &&
+ Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT) &&
"Expected a fixed point multiplication opcode");
SDLoc dl(Node);
@@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
unsigned Scale = Node->getConstantOperandVal(2);
- bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT);
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned VTSize = VT.getScalarSizeInBits();
if (!Scale) {
// [us]mul.fix(a, b, 0) -> mul(a, b)
- if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ if (!Saturating) {
+ if (isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
SDValue Result =
DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
@@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
}
}
- bool Signed =
- Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
"Expected scale to be less than the number of bits if signed or at "
"most the number of bits if unsigned.");
@@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (Scale == VTSize)
// Result is just the top half since we'd be shifting by the width of the
- // operand.
+ // operand. Overflow impossible so this works for both UMULFIX and
+ // UMULFIXSAT.
return Hi;
// The result will need to be shifted right by the scale since both operands
@@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (!Saturating)
return Result;
- unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
- SDValue HiMask =
- DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
- SDValue LoMask = DAG.getConstant(
- APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- APInt MinVal = APInt::getSignedMinValue(VTSize);
-
- Result = DAG.getSelectCC(dl, Hi, LoMask,
- DAG.getConstant(MaxVal, dl, VT), Result,
- ISD::SETGT);
- return DAG.getSelectCC(dl, Hi, HiMask,
- DAG.getConstant(MinVal, dl, VT), Result,
- ISD::SETLT);
+ if (!Signed) {
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
+ // widened multiplication) aren't all zeroes.
+
+ // Saturate to max if ((Hi >> Scale) != 0),
+ // which is the same as if (Hi > ((1 << Scale) - 1))
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETUGT);
+
+ return Result;
+ }
+
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
+ // widened multiplication) aren't all ones or all zeroes.
+
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
+
+ if (Scale == 0) {
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
+ DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
+ // Saturated to SatMin if wide product is negative, and SatMax if wide
+ // product is positive ...
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
+ ISD::SETLT);
+ // ... but only if we overflowed.
+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
+ }
+
+ // We handled Scale==0 above so all the bits to examine is in Hi.
+
+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),
+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
+ // Saturate to min if (Hi >> (Scale - 1)) < -1),
+ // which is the same as if (HI < (-1 << (Scale - 1))
+ SDValue HighMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
+ return Result;
}
void TargetLowering::expandUADDSUBO(
@@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO(
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Result >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- IsAdd ? ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
+ SDValue ConditionRHS =
+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
+
+ Overflow = DAG.getBoolExtOrTrunc(
+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
+ ResultType, ResultType);
}
bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
@@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
// being a legal type for the architecture and thus has to be split to
// two arguments.
SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ CallOptions.setIsPostTypeLegalization(true);
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
// Halves of WideVT are packed into registers in different order
// depending on platform endianness. This is usually handled by
// the C calling convention, but we can't defer to it in
// the legalizer.
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
} else {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
}
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
"Ret value is a collection of constituent nodes holding result.");