summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1349
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp26
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp147
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp68
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp338
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp163
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp156
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp34
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp897
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp673
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h81
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp348
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp487
24 files changed, 3453 insertions, 1427 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index de909cc10795..f35f663d6ba1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -149,7 +149,7 @@ namespace {
const TargetLowering &TLI;
const SelectionDAGTargetInfo *STI;
CombineLevel Level = BeforeLegalizeTypes;
- CodeGenOpt::Level OptLevel;
+ CodeGenOptLevel OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
@@ -242,7 +242,7 @@ namespace {
SDValue visit(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
: DAG(D), TLI(D.getTargetLoweringInfo()),
STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
@@ -430,6 +430,8 @@ namespace {
SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N);
+ SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
@@ -493,6 +495,7 @@ namespace {
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
template <class MatchContextClass> SDValue visitFMA(SDNode *N);
+ SDValue visitFMAD(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -502,6 +505,7 @@ namespace {
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitXRINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
@@ -537,6 +541,8 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitVPGATHER(SDNode *N);
SDValue visitVPSCATTER(SDNode *N);
+ SDValue visitVP_STRIDED_LOAD(SDNode *N);
+ SDValue visitVP_STRIDED_STORE(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitFP_TO_BF16(SDNode *N);
@@ -561,7 +567,7 @@ namespace {
SDValue N1, SDNodeFlags Flags);
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
- SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
+ SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
EVT VT, SDValue N0, SDValue N1,
SDNodeFlags Flags = SDNodeFlags());
@@ -607,6 +613,7 @@ namespace {
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
@@ -616,7 +623,10 @@ namespace {
SDValue BuildUDIV(SDNode *N);
SDValue BuildSREMPow2(SDNode *N);
SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
- SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
+ bool KnownNeverZero = false,
+ bool InexpensiveOnly = false,
+ std::optional<EVT> OutVT = std::nullopt);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
@@ -698,6 +708,11 @@ namespace {
case ISD::Constant:
case ISD::ConstantFP:
return StoreSource::Constant;
+ case ISD::BUILD_VECTOR:
+ if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
+ return StoreSource::Constant;
+ return StoreSource::Unknown;
case ISD::EXTRACT_VECTOR_ELT:
case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
@@ -1329,6 +1344,30 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
}
}
+
+ // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
+ // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
+ // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
+ // comparisons with the same predicate. This enables optimizations as the
+ // following one:
+ // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
+ // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
+ if (Opc == ISD::AND || Opc == ISD::OR) {
+ if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
+ N01->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+ ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
+ ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
+ if (CC1 == CC00 && CC1 != CC01) {
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
+ }
+ if (CC1 == CC01 && CC1 != CC00) {
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
+ }
+ }
+ }
}
return SDValue();
@@ -1873,6 +1912,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
}
SDValue DAGCombiner::visit(SDNode *N) {
+ // clang-format off
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
@@ -1963,6 +2003,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
+ case ISD::FMAD: return visitFMAD(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -1972,6 +2013,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::LRINT:
+ case ISD::LLRINT: return visitXRINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
@@ -2026,6 +2069,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
#include "llvm/IR/VPIntrinsics.def"
return visitVPOp(N);
}
+ // clang-format on
return SDValue();
}
@@ -2124,7 +2168,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
// Don't simplify token factors if optnone.
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return SDValue();
// Don't simplify the token factor if the node itself has too many operands.
@@ -2649,15 +2693,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
- unsigned Opcode = V.getOpcode();
- if (Opcode == ISD::OR)
- return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
- if (Opcode == ISD::XOR)
- return isMinSignedConstant(V.getOperand(1));
- return false;
-}
-
static bool
areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
@@ -2739,7 +2774,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// iff (or x, c0) is equivalent to (add x, c0).
// Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
// iff (xor x, c0) is equivalent to (add x, c0).
- if (isADDLike(N0, DAG)) {
+ if (DAG.isADDLike(N0)) {
SDValue N01 = N0.getOperand(1);
if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
@@ -2760,7 +2795,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// Do this optimization only when adding c does not introduce instructions
// for adding carries.
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
- if (isADDLike(N0, DAG) && N0.hasOneUse() &&
+ if (DAG.isADDLike(N0) && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
// If N0's type does not split or is a sign mask, it does not introduce
// add carry.
@@ -3011,7 +3046,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
return SDValue();
@@ -3281,11 +3316,16 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
- if (!IsSigned) {
+ if (IsSigned) {
+ // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
+ return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ } else {
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
@@ -3617,6 +3657,18 @@ SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
+ SDValue CarryIn, SDNode *N) {
+ // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
+ if (isBitwiseNot(N0)) {
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
+ return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3636,6 +3688,12 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
}
+ if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
+ return Combined;
+
+ if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
+ return Combined;
+
return SDValue();
}
@@ -4141,7 +4199,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an sub.
- if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowSub(IsSigned, N0, N1))
return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
return SDValue();
@@ -4207,7 +4265,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// If it cannot overflow, transform into an sub.
- if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.willNotOverflowSub(IsSigned, N0, N1))
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
@@ -4342,12 +4400,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1) &&
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ }
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
@@ -4869,31 +4927,31 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N->getValueType(0);
// fold (udiv x, (1 << c)) -> x >>u c
- if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1)) {
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- AddToWorklist(LogBase2.getNode());
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ AddToWorklist(LogBase2.getNode());
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- AddToWorklist(Trunc.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
}
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
SDValue N10 = N1.getOperand(0);
- if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N10)) {
- SDValue LogBase2 = BuildLogBase2(N10, DL);
- AddToWorklist(LogBase2.getNode());
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
+ if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
+ AddToWorklist(LogBase2.getNode());
- EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
- AddToWorklist(Trunc.getNode());
- SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ }
}
}
@@ -5111,14 +5169,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
- DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
- unsigned NumEltBits = VT.getScalarSizeInBits();
- SDValue LogBase2 = BuildLogBase2(N1, DL);
- SDValue SRLAmt = DAG.getNode(
- ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
- EVT ShiftVT = getShiftAmountTy(N0.getValueType());
- SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
- return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ hasOperation(ISD::SRL, VT)) {
+ if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
+ unsigned NumEltBits = VT.getScalarSizeInBits();
+ SDValue SRLAmt = DAG.getNode(
+ ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
}
// If the type twice as wide is legal, transform the mulhu to a wider multiply
@@ -5292,6 +5351,10 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // Constant fold.
+ if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+ return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
+
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5330,6 +5393,10 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // Constant fold.
+ if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
+ return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
+
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5412,34 +5479,18 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
N->getVTList(), N0, N0);
- if (IsSigned) {
- // A 1 bit SMULO overflows if both inputs are 1.
- if (VT.getScalarSizeInBits() == 1) {
- SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
- return CombineTo(N, And,
- DAG.getSetCC(DL, CarryVT, And,
- DAG.getConstant(0, DL, VT), ISD::SETNE));
- }
-
- // Multiplying n * m significant bits yields a result of n + m significant
- // bits. If the total number of significant bits does not exceed the
- // result bit width (minus 1), there is no overflow.
- unsigned SignBits = DAG.ComputeNumSignBits(N0);
- if (SignBits > 1)
- SignBits += DAG.ComputeNumSignBits(N1);
- if (SignBits > VT.getScalarSizeInBits() + 1)
- return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
- } else {
- KnownBits N1Known = DAG.computeKnownBits(N1);
- KnownBits N0Known = DAG.computeKnownBits(N0);
- bool Overflow;
- (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
- if (!Overflow)
- return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ // A 1 bit SMULO overflows if both inputs are 1.
+ if (IsSigned && VT.getScalarSizeInBits() == 1) {
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
+ SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
+ DAG.getConstant(0, DL, VT), ISD::SETNE);
+ return CombineTo(N, And, Cmp);
}
+ // If it cannot overflow, transform into a mul.
+ if (DAG.willNotOverflowMul(IsSigned, N0, N1))
+ return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
@@ -5459,12 +5510,12 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
return 0;
// The constants need to be the same or a truncated version of each other.
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1));
+ ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3));
if (!N1C || !N3C)
return 0;
- const APInt &C1 = N1C->getAPIntValue();
- const APInt &C2 = N3C->getAPIntValue();
+ const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
+ const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
return 0;
return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
@@ -5579,7 +5630,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SelectionDAG &DAG) {
// We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
// select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
- // be truncated versions of the the setcc (N0/N1).
+ // be truncated versions of the setcc (N0/N1).
if ((N0 != N2 &&
(N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
@@ -6013,6 +6064,72 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}
+static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
+ SelectionDAG &DAG) {
+ return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
+}
+
+static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
+ SelectionDAG &DAG) {
+ return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
+}
+
+static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
+ ISD::CondCode CC, unsigned OrAndOpcode,
+ SelectionDAG &DAG,
+ bool isFMAXNUMFMINNUM_IEEE,
+ bool isFMAXNUMFMINNUM) {
+ // The optimization cannot be applied for all the predicates because
+ // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
+ // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
+ // applied at all if one of the operands is a signaling NaN.
+
+ // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
+ // are non NaN values.
+ if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
+ return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ (OrAndOpcode == ISD::AND)))
+ return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
+ // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
+ // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
+ // that there are not any sNaNs, then the optimization is not valid
+ // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
+ // the optimization using FMINNUM/FMAXNUM for the following cases. If
+ // we can prove that we do not have any sNaNs, then we can do the
+ // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
+ // cases.
+ else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
+ (OrAndOpcode == ISD::AND)))
+ return isFMAXNUMFMINNUM ? ISD::FMINNUM
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMINNUM_IEEE
+ : ISD::DELETED_NODE;
+ else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) &&
+ (OrAndOpcode == ISD::AND)))
+ return isFMAXNUMFMINNUM ? ISD::FMAXNUM
+ : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE
+ ? ISD::FMAXNUM_IEEE
+ : ISD::DELETED_NODE;
+ return ISD::DELETED_NODE;
+}
+
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
assert(
@@ -6022,7 +6139,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
// TODO: Search past casts/truncates.
SDValue LHS = LogicOp->getOperand(0);
SDValue RHS = LogicOp->getOperand(1);
- if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
+ !LHS->hasOneUse() || !RHS->hasOneUse())
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -6050,59 +6168,77 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
// (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
// and and-cmp-cmp will be replaced with max-cmp sequence:
// (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
- if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
- TLI.isOperationLegal(ISD::SMAX, OpVT) &&
- TLI.isOperationLegal(ISD::UMIN, OpVT) &&
- TLI.isOperationLegal(ISD::SMIN, OpVT)) {
- if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC &&
- LHS->hasOneUse() && RHS->hasOneUse() &&
- // The two comparisons should have either the same predicate or the
- // predicate of one of the comparisons is the opposite of the other one.
- (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
- // The optimization does not work for `==` or `!=` .
- !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) {
- SDValue CommonValue, Operand1, Operand2;
- ISD::CondCode CC = ISD::SETCC_INVALID;
- if (CCL == CCR) {
- if (LHS0 == RHS0) {
- CommonValue = LHS0;
- Operand1 = LHS1;
- Operand2 = RHS1;
- CC = ISD::getSetCCSwappedOperands(CCL);
- } else if (LHS1 == RHS1) {
- CommonValue = LHS1;
- Operand1 = LHS0;
- Operand2 = RHS0;
- CC = CCL;
- }
- } else {
- assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
- if (LHS0 == RHS1) {
- CommonValue = LHS0;
- Operand1 = LHS1;
- Operand2 = RHS0;
- CC = ISD::getSetCCSwappedOperands(CCL);
- } else if (RHS0 == LHS1) {
- CommonValue = LHS1;
- Operand1 = LHS0;
- Operand2 = RHS1;
- CC = CCL;
- }
+ // The optimization does not work for `==` or `!=` .
+ // The two comparisons should have either the same predicate or the
+ // predicate of one of the comparisons is the opposite of the other one.
+ bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
+ TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
+ bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
+ if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+ TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
+ (OpVT.isFloatingPoint() &&
+ (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
+ !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
+ CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
+ CCL != ISD::SETTRUE &&
+ (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
+
+ SDValue CommonValue, Operand1, Operand2;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (CCL == CCR) {
+ if (LHS0 == RHS0) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS1;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (LHS1 == RHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS0;
+ CC = CCL;
}
+ } else {
+ assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
+ if (LHS0 == RHS1) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS0;
+ CC = CCR;
+ } else if (RHS0 == LHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS1;
+ CC = CCL;
+ }
+ }
- if (CC != ISD::SETCC_INVALID) {
- unsigned NewOpcode;
- bool IsSigned = isSignedIntSetCC(CC);
- if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT ||
- CC == ISD::SETULT) &&
- (LogicOp->getOpcode() == ISD::OR)) ||
- ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT ||
- CC == ISD::SETUGT) &&
- (LogicOp->getOpcode() == ISD::AND)))
+ // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
+ // handle it using OR/AND.
+ if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
+ CC = ISD::SETCC_INVALID;
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
+ CC = ISD::SETCC_INVALID;
+
+ if (CC != ISD::SETCC_INVALID) {
+ unsigned NewOpcode = ISD::DELETED_NODE;
+ bool IsSigned = isSignedIntSetCC(CC);
+ if (OpVT.isInteger()) {
+ bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
+ CC == ISD::SETLT || CC == ISD::SETULT);
+ bool IsOr = (LogicOp->getOpcode() == ISD::OR);
+ if (IsLess == IsOr)
NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
else
NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+ } else if (OpVT.isFloatingPoint())
+ NewOpcode =
+ getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
+ DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+ if (NewOpcode != ISD::DELETED_NODE) {
SDValue MinMaxValue =
DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
@@ -6115,8 +6251,7 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
if (CCL == CCR &&
CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
- LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
- RHS.hasOneUse()) {
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
const APInt &APLhs = LHS1C->getAPIntValue();
const APInt &APRhs = RHS1C->getAPIntValue();
@@ -6179,6 +6314,33 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
return SDValue();
}
+// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
+// We canonicalize to the `select` form in the middle end, but the `and` form
+// gets better codegen and all tested targets (arm, x86, riscv)
+static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
+ const SDLoc &DL, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!isNullConstant(F))
+ return SDValue();
+
+ EVT CondVT = Cond.getValueType();
+ if (TLI.getBooleanContents(CondVT) !=
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return SDValue();
+
+ if (T.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!isOneConstant(T.getOperand(1)))
+ return SDValue();
+
+ EVT OpVT = T.getValueType();
+
+ SDValue CondMask =
+ OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
+ return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -6464,7 +6626,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SmallPtrSet<SDNode*, 2> NodesWithConsts;
SDNode *FixupNode = nullptr;
if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
- if (Loads.size() == 0)
+ if (Loads.empty())
return false;
LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
@@ -6488,12 +6650,17 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
- std::swap(Op0, Op1);
+ Op0 =
+ DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
+
+ if (isa<ConstantSDNode>(Op1))
+ Op1 =
+ DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
- SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
- Op1, MaskOp);
+ if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
+ std::swap(Op0, Op1);
- DAG.UpdateNodeOperands(LogicN, Op0, And);
+ DAG.UpdateNodeOperands(LogicN, Op0, Op1);
}
// Create narrow loads.
@@ -6924,12 +7091,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
- SplatBitSize, HasAnyUndefs);
- if (IsSplat) {
+ // Endianness should not matter here. Code below makes sure that we only
+ // use the result if the SplatBitSize is a multiple of the vector element
+ // size. And after that we AND all element sized parts of the splat
+ // together. So the end result should be the same regardless of in which
+ // order we do those operations.
+ const bool IsBigEndian = false;
+ bool IsSplat =
+ Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, EltBitWidth, IsBigEndian);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
// Undef bits can contribute to a possible optimisation if set, so
// set them.
SplatValue |= SplatUndef;
@@ -6938,23 +7116,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
- unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
-
- // If the splat value has been compressed to a bitlength lower
- // than the size of the vector lane, we need to re-expand it to
- // the lane size.
- if (EltBitWidth > SplatBitSize)
- for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
- SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
- SplatValue |= SplatValue.shl(SplatBitSize);
-
- // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
- // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
- if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnes(EltBitWidth);
- for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
- Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
- }
+ Constant = APInt::getAllOnes(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
@@ -7467,12 +7631,12 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
getShiftAmountTy(VT)))
- return BSwap;
+ return BSwap;
// Try again with commuted operands.
if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
getShiftAmountTy(VT)))
- return BSwap;
+ return BSwap;
// Look for either
@@ -8493,7 +8657,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
/// *ExtractVectorElement
using SDByteProvider = ByteProvider<SDNode *>;
-static const std::optional<SDByteProvider>
+static std::optional<SDByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
std::optional<uint64_t> VectorIndex,
unsigned StartingIndex = 0) {
@@ -8701,7 +8865,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations || OptLevel == CodeGenOpt::None)
+ if (LegalOperations || OptLevel == CodeGenOptLevel::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -9710,9 +9874,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
- if (SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N0.getOpcode() == ISD::SHL) {
auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -9886,15 +10047,35 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0->hasOneUse() &&
- isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
+ N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Shl1 =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ AddToWorklist(Shl0.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ }
+ }
+
+ // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
+ // TODO: Add zext/add_nuw variant with suitable test coverage
+ // TODO: Should we limit this with isLegalAddImmediate?
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::ADD &&
+ N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
+ N0.getOperand(0)->hasOneUse() &&
TLI.isDesirableToCommuteWithShift(N, Level)) {
- SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
- SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
- AddToWorklist(Shl0.getNode());
- AddToWorklist(Shl1.getNode());
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
+ SDValue Add = N0.getOperand(0);
+ SDLoc DL(N0);
+ if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
+ {Add.getOperand(1)})) {
+ if (SDValue ShlC =
+ DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
+ SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
+ SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
+ return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
+ }
+ }
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
@@ -9910,6 +10091,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE && N1C) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
@@ -10110,25 +10294,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
- // sext_inreg.
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
- unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
- if (VT.isVector())
- ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
- VT.getVectorElementCount());
- if (!LegalOperations ||
- TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
- TargetLowering::Legal)
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- N0.getOperand(0), DAG.getValueType(ExtVT));
- // Even if we can't convert to sext_inreg, we might be able to remove
- // this shift pair if the input is already sign extended.
- if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
- return N0.getOperand(0);
- }
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
// clamp (add c1, c2) to max shift.
@@ -10169,7 +10335,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// If truncate is free for the target sext(shl) is likely to result in better
// code.
if (N0.getOpcode() == ISD::SHL && N1C) {
- // Get the two constanst of the shifts, CN0 = m, CN = n.
+ // Get the two constants of the shifts, CN0 = m, CN = n.
const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
if (N01C) {
LLVMContext &Ctx = *DAG.getContext();
@@ -10640,7 +10806,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
SDValue NewPtr = DAG.getMemBasePlusOffset(
- RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
+ RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
@@ -10739,9 +10905,12 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
Op1 = AbsOp1.getOperand(1);
unsigned Opc0 = Op0.getOpcode();
+
// Check if the operands of the sub are (zero|sign)-extended.
+ // TODO: Should we use ValueTracking instead?
if (Opc0 != Op1.getOpcode() ||
- (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
+ (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
+ Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
TLI.preferABDSToABSWithNSW(VT)) {
@@ -10751,17 +10920,24 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
return SDValue();
}
- EVT VT1 = Op0.getOperand(0).getValueType();
- EVT VT2 = Op1.getOperand(0).getValueType();
- unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
+ EVT VT0, VT1;
+ if (Opc0 == ISD::SIGN_EXTEND_INREG) {
+ VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
+ VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
+ } else {
+ VT0 = Op0.getOperand(0).getValueType();
+ VT1 = Op1.getOperand(0).getValueType();
+ }
+ unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
- // NOTE: Extensions must be equivalent.
- if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
+ EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
+ if ((VT0 == MaxVT || Op0->hasOneUse()) &&
+ (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
+ DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
+ DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
@@ -11487,6 +11663,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue BinOp = foldSelectOfBinops(N))
return BinOp;
+ if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
+ return R;
+
return SDValue();
}
@@ -11547,8 +11726,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
SelectionDAG &DAG, const SDLoc &DL) {
- if (Index.getOpcode() != ISD::ADD)
- return false;
// Only perform the transformation when existing operands can be reused.
if (IndexIsScaled)
@@ -11558,21 +11735,27 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
return false;
EVT VT = BasePtr.getValueType();
+
+ if (SDValue SplatVal = DAG.getSplatValue(Index);
+ SplatVal && !isNullConstant(SplatVal) &&
+ SplatVal.getValueType() == VT) {
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
+ return true;
+ }
+
+ if (Index.getOpcode() != ISD::ADD)
+ return false;
+
if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
SplatVal && SplatVal.getValueType() == VT) {
- if (isNullConstant(BasePtr))
- BasePtr = SplatVal;
- else
- BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
Index = Index.getOperand(1);
return true;
}
if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
SplatVal && SplatVal.getValueType() == VT) {
- if (isNullConstant(BasePtr))
- BasePtr = SplatVal;
- else
- BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
+ BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
Index = Index.getOperand(0);
return true;
}
@@ -11586,10 +11769,9 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's always safe to look through zero extends.
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
IndexType = ISD::UNSIGNED_SCALED;
- Index = Op;
+ Index = Index.getOperand(0);
return true;
}
if (ISD::isIndexTypeSigned(IndexType)) {
@@ -11600,12 +11782,10 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's only safe to look through sign extends when Index is signed.
if (Index.getOpcode() == ISD::SIGN_EXTEND &&
- ISD::isIndexTypeSigned(IndexType)) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
- Index = Op;
- return true;
- }
+ ISD::isIndexTypeSigned(IndexType) &&
+ TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
+ Index = Index.getOperand(0);
+ return true;
}
return false;
@@ -11756,6 +11936,21 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
+ auto *SST = cast<VPStridedStoreSDNode>(N);
+ EVT EltVT = SST->getValue().getValueType().getVectorElementType();
+ // Combine strided stores with unit-stride to a regular VP store.
+ if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
+ CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
+ return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
+ SST->getBasePtr(), SST->getOffset(), SST->getMask(),
+ SST->getVectorLength(), SST->getMemoryVT(),
+ SST->getMemOperand(), SST->getAddressingMode(),
+ SST->isTruncatingStore(), SST->isCompressingStore());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
SDValue Mask = MGT->getMask();
@@ -11843,6 +12038,22 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
+ auto *SLD = cast<VPStridedLoadSDNode>(N);
+ EVT EltVT = SLD->getValueType(0).getVectorElementType();
+ // Combine strided loads with unit-stride to a regular VP load.
+ if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
+ CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
+ SDValue NewLd = DAG.getLoadVP(
+ SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
+ SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
+ SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
+ SLD->getMemOperand(), SLD->isExpandingLoad());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+ return SDValue();
+}
+
/// A vector select of 2 constant vectors can be simplified to math/logic to
/// avoid a variable select instruction and possibly avoid constant loads.
SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
@@ -12255,27 +12466,132 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
- SDLoc(N), !PreferSetCC);
+ SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
- if (!Combined)
- return SDValue();
+ if (Combined) {
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
- // If we prefer to have a setcc, and we don't, we'll try our best to
- // recreate one using rebuildSetCC.
- if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
- SDValue NewSetCC = rebuildSetCC(Combined);
-
- // We don't have anything interesting to combine to.
- if (NewSetCC.getNode() == N)
- return SDValue();
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
- if (NewSetCC)
- return NewSetCC;
+ if (NewSetCC)
+ return NewSetCC;
+ }
+ return Combined;
}
- return Combined;
+ // Optimize
+ // 1) (icmp eq/ne (and X, C0), (shift X, C1))
+ // or
+ // 2) (icmp eq/ne X, (rotate X, C1))
+ // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
+ // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
+ // Then:
+ // If C1 is a power of 2, then the rotate and shift+and versions are
+ // equivilent, so we can interchange them depending on target preference.
+ // Otherwise, if we have the shift+and version we can interchange srl/shl
+ // which inturn affects the constant C0. We can use this to get better
+ // constants again determined by target preference.
+ if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
+ auto IsAndWithShift = [](SDValue A, SDValue B) {
+ return A.getOpcode() == ISD::AND &&
+ (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
+ A.getOperand(0) == B.getOperand(0);
+ };
+ auto IsRotateWithOp = [](SDValue A, SDValue B) {
+ return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
+ B.getOperand(0) == A;
+ };
+ SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
+ bool IsRotate = false;
+
+ // Find either shift+and or rotate pattern.
+ if (IsAndWithShift(N0, N1)) {
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsAndWithShift(N1, N0)) {
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ } else if (IsRotateWithOp(N0, N1)) {
+ IsRotate = true;
+ AndOrOp = N0;
+ ShiftOrRotate = N1;
+ } else if (IsRotateWithOp(N1, N0)) {
+ IsRotate = true;
+ AndOrOp = N1;
+ ShiftOrRotate = N0;
+ }
+
+ if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
+ (IsRotate || AndOrOp.hasOneUse())) {
+ EVT OpVT = N0.getValueType();
+ // Get constant shift/rotate amount and possibly mask (if its shift+and
+ // variant).
+ auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
+ ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
+ /*AllowTrunc*/ false);
+ if (CNode == nullptr)
+ return std::nullopt;
+ return CNode->getAPIntValue();
+ };
+ std::optional<APInt> AndCMask =
+ IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
+ std::optional<APInt> ShiftCAmt =
+ GetAPIntValue(ShiftOrRotate.getOperand(1));
+ unsigned NumBits = OpVT.getScalarSizeInBits();
+
+ // We found constants.
+ if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
+ unsigned ShiftOpc = ShiftOrRotate.getOpcode();
+ // Check that the constants meet the constraints.
+ bool CanTransform = IsRotate;
+ if (!CanTransform) {
+ // Check that mask and shift compliment eachother
+ CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
+ // Check that we are comparing all bits
+ CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
+ // Check that the and mask is correct for the shift
+ CanTransform &=
+ ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
+ }
+
+ // See if target prefers another shift/rotate opcode.
+ unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
+ OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
+ // Transform is valid and we have a new preference.
+ if (CanTransform && NewShiftOpc != ShiftOpc) {
+ SDLoc DL(N);
+ SDValue NewShiftOrRotate =
+ DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
+ ShiftOrRotate.getOperand(1));
+ SDValue NewAndOrOp = SDValue();
+
+ if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
+ APInt NewMask =
+ NewShiftOpc == ISD::SHL
+ ? APInt::getHighBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue())
+ : APInt::getLowBitsSet(NumBits,
+ NumBits - ShiftCAmt->getZExtValue());
+ NewAndOrOp =
+ DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
+ DAG.getConstant(NewMask, DL, OpVT));
+ } else {
+ NewAndOrOp = ShiftOrRotate.getOperand(0);
+ }
+
+ return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
+ }
+ }
+ }
+ }
+ return SDValue();
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
@@ -12510,7 +12826,7 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
// a good reason for the transformation.
- return ExtendNodes.size();
+ return !ExtendNodes.empty();
}
return true;
}
@@ -12612,7 +12928,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -12832,11 +13148,10 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
- const TargetLowering &TLI, EVT VT,
- SDNode *N, SDValue N0,
- ISD::LoadExtType ExtLoadType,
- ISD::NodeType ExtOpc) {
+static SDValue
+tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
if (!N0.hasOneUse())
return SDValue();
@@ -12844,7 +13159,8 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
+ if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
+ !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -13117,8 +13433,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return foldedExt;
if (SDValue foldedExt =
- tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
- ISD::SIGN_EXTEND))
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
// fold (sext (load x)) to multiple smaller sextloads.
@@ -13181,9 +13497,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return V;
// fold (sext x) -> (zext x) if the sign bit is known zero.
- if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
- DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
+ if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0)) {
+ SDNodeFlags Flags;
+ Flags.setNonNeg(true);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
+ }
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
@@ -13327,8 +13647,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getScalarValueSizeInBits(),
std::min(Op.getScalarValueSizeInBits(),
VT.getScalarSizeInBits()));
- if (TruncatedBits.isSubsetOf(Known.Zero))
- return DAG.getZExtOrTrunc(Op, DL, VT);
+ if (TruncatedBits.isSubsetOf(Known.Zero)) {
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
+ DAG.salvageDebugInfo(*N0.getNode());
+
+ return ZExtOrTrunc;
+ }
}
// fold (zext (truncate x)) -> (and x, mask)
@@ -13396,8 +13720,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return foldedExt;
if (SDValue foldedExt =
- tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
- ISD::ZERO_EXTEND))
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
// fold (zext (load x)) to multiple smaller zextloads.
@@ -13408,8 +13732,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
- // additional users.
- if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+ // additional users, or the zext is already free.
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -14005,8 +14329,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
- TypeSize::Fixed(PtrOff), DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -14316,9 +14640,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
- // noop truncate
- if (SrcVT == VT)
- return N0;
+ // trunc(undef) = undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -14350,7 +14674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue X = N0.getOperand(0);
SDValue ExtVal = N0.getOperand(1);
EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
- if (ExtVT.bitsLT(VT)) {
+ if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
}
@@ -14448,6 +14772,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getBuildVector(VT, DL, TruncOps);
}
+ // trunc (splat_vector x) -> splat_vector (trunc x)
+ if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ return DAG.getSplatVector(
+ VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -15301,7 +15635,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
// Replacing the inner FMul could cause the outer FMA to be simplified
// away.
- return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
+ return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
}
TmpFMA = TmpFMA->getOperand(2);
@@ -15859,7 +16193,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
- AddToWorklist(Fused.getNode());
+ if (Fused.getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
@@ -16051,7 +16386,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// FADD -> FMA combines:
if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
- AddToWorklist(Fused.getNode());
+ if (Fused.getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(Fused.getNode());
return Fused;
}
return SDValue();
@@ -16168,6 +16504,112 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
+// Transform IEEE Floats:
+// (fmul C, (uitofp Pow2))
+// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
+// (fdiv C, (uitofp Pow2))
+// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
+//
+// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
+// there is no need for more than an add/sub.
+//
+// This is valid under the following circumstances:
+// 1) We are dealing with IEEE floats
+// 2) C is normal
+// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
+// TODO: Much of this could also be used for generating `ldexp` on targets the
+// prefer it.
+SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue ConstOp, Pow2Op;
+
+ std::optional<int> Mantissa;
+ auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
+ if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
+ return false;
+
+ ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
+ Pow2Op = N->getOperand(1 - ConstOpIdx);
+ if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
+ (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
+ !DAG.computeKnownBits(Pow2Op).isNonNegative()))
+ return false;
+
+ Pow2Op = Pow2Op.getOperand(0);
+
+ // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
+ // TODO: We could use knownbits to make this bound more precise.
+ int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
+
+ auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
+ if (CFP == nullptr)
+ return false;
+
+ const APFloat &APF = CFP->getValueAPF();
+
+ // Make sure we have normal/ieee constant.
+ if (!APF.isNormal() || !APF.isIEEE())
+ return false;
+
+ // Make sure the floats exponent is within the bounds that this transform
+ // produces bitwise equals value.
+ int CurExp = ilogb(APF);
+ // FMul by pow2 will only increase exponent.
+ int MinExp =
+ N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
+ // FDiv by pow2 will only decrease exponent.
+ int MaxExp =
+ N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
+ if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
+ MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
+ return false;
+
+ // Finally make sure we actually know the mantissa for the float type.
+ int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
+ if (!Mantissa)
+ Mantissa = ThisMantissa;
+
+ return *Mantissa == ThisMantissa && ThisMantissa > 0;
+ };
+
+ // TODO: We may be able to include undefs.
+ return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
+ };
+
+ if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
+ return SDValue();
+
+ if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
+ return SDValue();
+
+ // Get log2 after all other checks have taken place. This is because
+ // BuildLogBase2 may create a new node.
+ SDLoc DL(N);
+ // Get Log2 type with same bitwidth as the float type (VT).
+ EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
+ if (VT.isVector())
+ NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
+ VT.getVectorElementCount());
+
+ SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
+ /*InexpensiveOnly*/ true, NewIntVT);
+ if (!Log2)
+ return SDValue();
+
+ // Perform actual transform.
+ SDValue MantissaShiftCnt =
+ DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
+ // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
+ // `(X << C1) + (C << C1)`, but that isn't always the case because of the
+ // cast. We could implement that by handle here to handle the casts.
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
+ SDValue ResAsInt =
+ DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
+ NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
+ SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
+ return ResAsFP;
+}
+
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -16308,6 +16750,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return Fused;
}
+ // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
+ // able to run.
+ if (SDValue R = combineFMulOrFDivWithIntPow2(N))
+ return R;
+
return SDValue();
}
@@ -16438,6 +16885,21 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMAD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Constant fold FMAD.
+ if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2))
+ return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
+
+ return SDValue();
+}
+
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
@@ -16659,6 +17121,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
}
+ if (SDValue R = combineFMulOrFDivWithIntPow2(N))
+ return R;
+
return SDValue();
}
@@ -17046,6 +17511,21 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return FoldIntToFPToInt(N, DAG);
}
+SDValue DAGCombiner::visitXRINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (lrint|llrint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (lrint|llrint c1fp) -> c1
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -17197,6 +17677,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
case ISD::FRINT:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FCEIL:
return N0;
@@ -17671,6 +18152,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
+ constexpr unsigned int MaxSteps = 8192;
if (isa<ConstantSDNode>(Offset))
for (SDNode::use_iterator UI = BasePtr->use_begin(),
UE = BasePtr->use_end();
@@ -17681,7 +18163,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
- if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
+ MaxSteps))
continue;
if (Use.getUser()->getOpcode() != ISD::ADD &&
@@ -17714,7 +18197,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
for (SDNode *Use : Ptr->uses()) {
if (Use == N)
continue;
- if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
return false;
// If Ptr may be folded in addressing mode of other use, then it's
@@ -17888,12 +18371,13 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
// Check for #2.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
+ constexpr unsigned int MaxSteps = 8192;
// Ptr is predecessor to both N and Op.
Visited.insert(Ptr.getNode());
Worklist.push_back(N);
Worklist.push_back(Op);
- if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
- !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
return Op;
}
return nullptr;
@@ -18070,7 +18554,7 @@ StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
- if (OptLevel == CodeGenOpt::None || !LD->isSimple())
+ if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
int64_t Offset;
@@ -18270,7 +18754,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return V;
// Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
+ if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
+ !LD->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > LD->getAlign() &&
isAligned(*Alignment, LD->getSrcValueOffset())) {
@@ -19006,7 +19491,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
}
++OpsNarrowed;
@@ -19132,7 +19617,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
SDValue NewPtr =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -19305,7 +19790,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- assert(Chains.size() > 0 && "Chain should have generated a chain");
+ assert(!Chains.empty() && "Chain should have generated a chain");
return DAG.getTokenFactor(StoreDL, Chains);
}
@@ -19381,23 +19866,24 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
SDValue Val = St->getValue();
- // If constant is of the wrong type, convert it now.
+ // If constant is of the wrong type, convert it now. This comes up
+ // when one of our stores was truncating.
if (MemVT != Val.getValueType()) {
Val = peekThroughBitcasts(Val);
// Deal with constants of wrong size.
if (ElementSizeBits != Val.getValueSizeInBits()) {
- EVT IntMemVT =
- EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
- if (isa<ConstantFPSDNode>(Val)) {
+ auto *C = dyn_cast<ConstantSDNode>(Val);
+ if (!C)
// Not clear how to truncate FP values.
+ // TODO: Handle truncation of build_vector constants
return false;
- }
- if (auto *C = dyn_cast<ConstantSDNode>(Val))
- Val = DAG.getConstant(C->getAPIntValue()
- .zextOrTrunc(Val.getValueSizeInBits())
- .zextOrTrunc(ElementSizeBits),
- SDLoc(C), IntMemVT);
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
}
// Make sure correctly size type is the correct type.
Val = DAG.getBitcast(MemVT, Val);
@@ -19473,6 +19959,10 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// If fp truncation is necessary give up for now.
if (MemVT.getSizeInBits() != ElementSizeBits)
return false;
+ } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
+ // Not yet handled
+ return false;
} else {
llvm_unreachable("Invalid constant element type");
}
@@ -19603,7 +20093,7 @@ void DAGCombiner::getStoreMergeCandidates(
case StoreSource::Constant:
if (NoTypeMatch)
return false;
- if (!isIntOrFPConstant(OtherBC))
+ if (getStoreSource(OtherBC) != StoreSource::Constant)
return false;
break;
case StoreSource::Extract:
@@ -19825,6 +20315,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
+ else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
+ IsElementZero = true;
if (IsElementZero) {
if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
FirstZeroAfterNonZero = i;
@@ -20286,7 +20778,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
return false;
// TODO: Extend this function to merge stores of scalable vectors.
@@ -20448,8 +20940,8 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (ST->isSimple() &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
+ !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
// 64-bit integer store into two 32-bit stores.
@@ -20464,7 +20956,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -20492,9 +20984,11 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
SDValue Elt = Value.getOperand(1);
SDValue Idx = Value.getOperand(2);
- // If the element isn't byte sized then we can't compute an offset
+ // If the element isn't byte sized or is implicitly truncated then we can't
+ // compute an offset.
EVT EltVT = Elt.getValueType();
- if (!EltVT.isByteSized())
+ if (!EltVT.isByteSized() ||
+ EltVT != Value.getOperand(0).getValueType().getVectorElementType())
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
@@ -20515,7 +21009,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
EVT PtrVT = Ptr.getValueType();
SDValue Offset =
- DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
+ DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT),
DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
MachinePointerInfo PointerInfo(ST->getAddressSpace());
@@ -20524,7 +21018,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
// info
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
- NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL);
+ NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
}
@@ -20563,7 +21057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
// Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
+ if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
+ !ST->isAtomic()) {
if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
if (*Alignment > ST->getAlign() &&
isAligned(*Alignment, ST->getSrcValueOffset())) {
@@ -20679,7 +21174,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
ST1->isUnindexed() && ST1->isSimple()) {
- if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
+ if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If this is a store followed by a store with the same value to the
@@ -20687,7 +21182,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
}
- if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
// If we consider two stores and one smaller in size is a scalable
@@ -20700,7 +21195,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
ST->getMemoryVT().getStoreSize())) {
CombineTo(ST1, ST1->getChain());
- return SDValue();
+ return SDValue(N, 0);
}
} else {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
@@ -20713,7 +21208,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ChainBase,
ST1->getMemoryVT().getFixedSizeInBits())) {
CombineTo(ST1, ST1->getChain());
- return SDValue();
+ return SDValue(N, 0);
}
}
}
@@ -20850,7 +21345,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
/// }
///
SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return SDValue();
// Can't change the number of memory accesses for a volatile store or break
@@ -20920,7 +21415,8 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
// Higher value store.
SDValue St1 = DAG.getStore(
St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
@@ -21687,14 +22183,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (DAG.isKnownNeverZero(Index))
return DAG.getUNDEF(ScalarVT);
- // Check if the result type doesn't match the inserted element type. A
- // SCALAR_TO_VECTOR may truncate the inserted element and the
- // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ // Check if the result type doesn't match the inserted element type.
+ // The inserted element and extracted element may have mismatched bitwidth.
+ // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
SDValue InOp = VecOp.getOperand(0);
if (InOp.getValueType() != ScalarVT) {
- assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
- InOp.getValueType().bitsGT(ScalarVT));
- return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
+ if (InOp.getValueType().bitsGT(ScalarVT))
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
}
return InOp;
}
@@ -21746,6 +22243,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+ // See if the extracted element is constant, in which case fold it if its
+ // a legal fp immediate.
+ if (IndexC && ScalarVT.isFloatingPoint()) {
+ APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
+ KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
+ if (KnownElt.isConstant()) {
+ APFloat CstFP =
+ APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
+ if (TLI.isFPImmLegal(CstFP, ScalarVT))
+ return DAG.getConstantFP(CstFP, DL, ScalarVT);
+ }
+ }
+
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
@@ -22108,12 +22618,18 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+ EVT VT = N->getValueType(0);
+
+ // Don't run this before LegalizeTypes if VT is legal.
+ // Targets may have other preferences.
+ if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
+ return SDValue();
+
// Only for little endian
if (!DAG.getDataLayout().isLittleEndian())
return SDValue();
SDLoc DL(N);
- EVT VT = N->getValueType(0);
EVT OutScalarTy = VT.getScalarType();
uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
@@ -23574,7 +24090,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
+ LegalOperations))
return SDValue();
// If extraction is cheap, we don't need to look at the binop operands
@@ -23819,7 +24336,7 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
// Profitability check: only deal with extractions from the first subvector
// unless the mask becomes an identity mask.
- if (!ShuffleVectorInst::isIdentityMask(NewMask) ||
+ if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
any_of(NewMask, [](int M) { return M < 0; }))
for (auto &DemandedSubvector : DemandedSubvectors)
if (DemandedSubvector.second != 0)
@@ -25581,15 +26098,31 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return N0;
// If this is an insert of an extracted vector into an undef vector, we can
- // just use the input to the extract.
+ // just use the input to the extract if the types match, and can simplify
+ // in some cases even if they don't.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
- return N1.getOperand(0);
+ N1.getOperand(1) == N2) {
+ EVT SrcVT = N1.getOperand(0).getValueType();
+ if (SrcVT == VT)
+ return N1.getOperand(0);
+ // TODO: To remove the zero check, need to adjust the offset to
+ // a multiple of the new src type.
+ if (isNullConstant(N2) &&
+ VT.isScalableVector() == SrcVT.isScalableVector()) {
+ if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ VT, N0, N1.getOperand(0), N2);
+ else
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ VT, N1.getOperand(0), N2);
+ }
+ }
// Simplify scalar inserts into an undef vector:
// insert_subvector undef, (splat X), N2 -> splat X
if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
- return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+ if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
// If we are inserting a bitcast value into an undef, with the same
// number of elements, just use the bitcast input of the extract.
@@ -25633,10 +26166,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
N1, N2);
// Eliminate an intermediate insert into an undef vector:
- // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
- // insert_subvector undef, X, N2
+ // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
+ // insert_subvector undef, X, 0
if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
- N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
+ N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
+ isNullConstant(N2))
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
N1.getOperand(1), N2);
@@ -25812,6 +26346,14 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
if (SDValue SD = visitVPSCATTER(N))
return SD;
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
+ if (SDValue SD = visitVP_STRIDED_LOAD(N))
+ return SD;
+
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
+ if (SDValue SD = visitVP_STRIDED_STORE(N))
+ return SD;
+
// VP operations in which all vector elements are disabled - either by
// determining that the mask is all false or that the EVL is 0 - can be
// eliminated.
@@ -26533,11 +27075,11 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- EVT VT = N->getValueType(0);
SDLoc DL(N);
unsigned BinOpc = N1.getOpcode();
- if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
+ if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
+ (N1.getResNo() != N2.getResNo()))
return SDValue();
// The use checks are intentionally on SDNode because we may be dealing
@@ -26554,26 +27096,29 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
- SDValue NewSel =
- DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
+ SDValue N10 = N1.getOperand(0);
+ SDValue N20 = N2.getOperand(0);
+ SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
- return NewBinOp;
+ return SDValue(NewBinOp.getNode(), N1.getResNo());
}
// Fold select(cond, binop(x, y), binop(x, z))
// --> binop(x, select(cond, y, z))
- // Second op VT might be different (e.g. shift amount type)
- if (N1.getOperand(0) == N2.getOperand(0) &&
- VT == N1.getOperand(1).getValueType() &&
- VT == N2.getOperand(1).getValueType()) {
- SDValue NewSel =
- DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
- NewBinOp->setFlags(N1->getFlags());
- NewBinOp->intersectFlagsWith(N2->getFlags());
- return NewBinOp;
+ if (N1.getOperand(0) == N2.getOperand(0)) {
+ SDValue N11 = N1.getOperand(1);
+ SDValue N21 = N2.getOperand(1);
+ // Second op VT might be different (e.g. shift amount type)
+ if (N11.getValueType() == N21.getValueType()) {
+ SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
+ SDValue NewBinOp =
+ DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
+ NewBinOp->setFlags(N1->getFlags());
+ NewBinOp->intersectFlagsWith(N2->getFlags());
+ return SDValue(NewBinOp.getNode(), N1.getResNo());
+ }
}
// TODO: Handle isCommutativeBinOp patterns as well?
@@ -26722,8 +27267,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
- unsigned ShCt = AndMask.getBitWidth() - 1;
- if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
+ unsigned ShCt = AndMask.getBitWidth() - 1;
SDValue ShlAmt =
DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
@@ -26764,10 +27309,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// zext (setcc n0, n1)
if (LegalTypes) {
SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
- if (VT.bitsLT(SCC.getValueType()))
- Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
- else
- Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
+ Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
} else {
SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
@@ -26937,10 +27479,129 @@ SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
return SDValue();
}
+// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
+//
+// Returns the node that represents `Log2(Op)`. This may create a new node. If
+// we are unable to compute `Log2(Op)` its return `SDValue()`.
+//
+// All nodes will be created at `DL` and the output will be of type `VT`.
+//
+// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
+// `AssumeNonZero` if this function should simply assume (not require proving
+// `Op` is non-zero).
+static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SDValue Op, unsigned Depth,
+ bool AssumeNonZero) {
+ assert(VT.isInteger() && "Only integer types are supported!");
+
+ auto PeekThroughCastsAndTrunc = [](SDValue V) {
+ while (true) {
+ switch (V.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ V = V.getOperand(0);
+ break;
+ default:
+ return V;
+ }
+ }
+ };
+
+ if (VT.isScalableVector())
+ return SDValue();
+
+ Op = PeekThroughCastsAndTrunc(Op);
+
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ SmallVector<APInt> Pow2Constants;
+ auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
+ if (C->isZero() || C->isOpaque())
+ return false;
+ // TODO: We may also be able to support negative powers of 2 here.
+ if (C->getAPIntValue().isPowerOf2()) {
+ Pow2Constants.emplace_back(C->getAPIntValue());
+ return true;
+ }
+ return false;
+ };
+
+ if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
+ if (!VT.isVector())
+ return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
+ // We need to create a build vector
+ SmallVector<SDValue> Log2Ops;
+ for (const APInt &Pow2 : Pow2Constants)
+ Log2Ops.emplace_back(
+ DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
+ return DAG.getBuildVector(VT, DL, Log2Ops);
+ }
+
+ if (Depth >= DAG.MaxRecursionDepth)
+ return SDValue();
+
+ auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
+ ToCast = PeekThroughCastsAndTrunc(ToCast);
+ EVT CurVT = ToCast.getValueType();
+ if (NewVT == CurVT)
+ return ToCast;
+
+ if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
+ return DAG.getBitcast(NewVT, ToCast);
+
+ return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
+ };
+
+ // log2(X << Y) -> log2(X) + Y
+ if (Op.getOpcode() == ISD::SHL) {
+ // 1 << Y and X nuw/nsw << Y are all non-zero.
+ if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
+ Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
+ if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
+ Depth + 1, AssumeNonZero))
+ return DAG.getNode(ISD::ADD, DL, VT, LogX,
+ CastToVT(VT, Op.getOperand(1)));
+ }
+
+ // c ? X : Y -> c ? Log2(X) : Log2(Y)
+ if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
+ Op.hasOneUse()) {
+ if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
+ Depth + 1, AssumeNonZero))
+ if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
+ Depth + 1, AssumeNonZero))
+ return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
+ }
+
+ // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
+ // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
+ if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
+ Op.hasOneUse()) {
+ // Use AssumeNonZero as false here. Otherwise we can hit case where
+ // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
+ if (SDValue LogX =
+ takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
+ /*AssumeNonZero*/ false))
+ if (SDValue LogY =
+ takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
+ /*AssumeNonZero*/ false))
+ return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
+ }
+
+ return SDValue();
+}
+
/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
-SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
- EVT VT = V.getValueType();
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
+ bool KnownNonZero, bool InexpensiveOnly,
+ std::optional<EVT> OutVT) {
+ EVT VT = OutVT ? *OutVT : V.getValueType();
+ SDValue InexpensiveLogBase2 =
+ takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
+ if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
+ return InexpensiveLogBase2;
+
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
@@ -27328,7 +27989,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
}
case ISD::CopyFromReg:
- // Always forward past past CopyFromReg.
+ // Always forward past CopyFromReg.
C = C.getOperand(0);
return true;
@@ -27400,7 +28061,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return OldChain;
// Ops for replacing token factor.
@@ -27410,7 +28071,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
GatherAllAliases(N, OldChain, Aliases);
// If no operands then chain to entry token.
- if (Aliases.size() == 0)
+ if (Aliases.empty())
return DAG.getEntryNode();
// If a single operand then chain to it. We don't need to revisit it.
@@ -27506,7 +28167,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
}
// If we didn't find a chained store, exit.
- if (ChainedStores.size() == 0)
+ if (ChainedStores.empty())
return false;
// Improve all chained stores (St and ChainedStores members) starting from
@@ -27557,7 +28218,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
}
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOptLevel::None)
return false;
const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
@@ -27585,7 +28246,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f0affce7b6b8..a83129586339 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1327,6 +1327,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
<< *DI << "\n");
return true;
}
+ if (auto SI = FuncInfo.StaticAllocaMap.find(dyn_cast<AllocaInst>(V));
+ SI != FuncInfo.StaticAllocaMap.end()) {
+ MachineOperand FrameIndexOp = MachineOperand::CreateFI(SI->second);
+ bool IsIndirect = false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
+ FrameIndexOp, Var, Expr);
+ return true;
+ }
if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
if (!FuncInfo.MF->useDebugInstrRef()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 1d0a03ccfcdc..1128ecfd860d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -64,11 +64,18 @@ static ISD::NodeType getPreferredExtendForValue(const Instruction *I) {
// can be exposed.
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
unsigned NumOfSigned = 0, NumOfUnsigned = 0;
- for (const User *U : I->users()) {
- if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ for (const Use &U : I->uses()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U.getUser())) {
NumOfSigned += CI->isSigned();
NumOfUnsigned += CI->isUnsigned();
}
+ if (const auto *CallI = dyn_cast<CallBase>(U.getUser())) {
+ if (!CallI->isArgOperand(&U))
+ continue;
+ unsigned ArgNo = CallI->getArgOperandNo(&U);
+ NumOfUnsigned += CallI->paramHasAttr(ArgNo, Attribute::ZExt);
+ NumOfSigned += CallI->paramHasAttr(ArgNo, Attribute::SExt);
+ }
}
if (NumOfSigned > NumOfUnsigned)
ExtendKind = ISD::SIGN_EXTEND;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4e7895c0b3cf..a27febe15db8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1311,15 +1311,15 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ const unsigned NumVals = F.getNumOperandRegisters();
GroupIdx.push_back(MIB->getNumOperands());
MIB.addImm(Flags);
++i; // Skip the ID value.
- switch (InlineAsm::getKind(Flags)) {
- default: llvm_unreachable("Bad flags!");
- case InlineAsm::Kind_RegDef:
+ switch (F.getKind()) {
+ case InlineAsm::Kind::RegDef:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
@@ -1328,8 +1328,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical()));
}
break;
- case InlineAsm::Kind_RegDefEarlyClobber:
- case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind::RegDefEarlyClobber:
+ case InlineAsm::Kind::Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
@@ -1337,9 +1337,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
ECRegs.push_back(Reg);
}
break;
- case InlineAsm::Kind_RegUse: // Use of register.
- case InlineAsm::Kind_Imm: // Immediate.
- case InlineAsm::Kind_Mem: // Non-function addressing mode.
+ case InlineAsm::Kind::RegUse: // Use of register.
+ case InlineAsm::Kind::Imm: // Immediate.
+ case InlineAsm::Kind::Mem: // Non-function addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (unsigned j = 0; j != NumVals; ++j, ++i)
@@ -1347,9 +1347,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/*IsDebug=*/false, IsClone, IsCloned);
// Manually set isTied bits.
- if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
- unsigned DefGroup = 0;
- if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ if (F.isRegUseKind()) {
+ unsigned DefGroup;
+ if (F.isUseOperandTiedToDef(DefGroup)) {
unsigned DefIdx = GroupIdx[DefGroup] + 1;
unsigned UseIdx = GroupIdx.back() + 1;
for (unsigned j = 0; j != NumVals; ++j)
@@ -1357,7 +1357,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
break;
- case InlineAsm::Kind_Func: // Function addressing mode.
+ case InlineAsm::Kind::Func: // Function addressing mode.
for (unsigned j = 0; j != NumVals; ++j, ++i) {
SDValue Op = Node->getOperand(i);
AddOperand(MIB, Op, 0, nullptr, VRBaseMap,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 61fc31715d71..5e1f9fbcdde0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -324,7 +325,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
TLI.ShouldShrinkFPConstant(OrigVT)) {
Type *SType = SVT.getTypeForEVT(*DAG.getContext());
- LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ LLVMC = cast<ConstantFP>(ConstantFoldCastOperand(
+ Instruction::FPTrunc, LLVMC, SType, DAG.getDataLayout()));
VT = SVT;
Extend = true;
}
@@ -459,7 +461,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
ST->getOriginalAlign(), MMOFlags, AAInfo);
}
- if (CFP->getValueType(0) == MVT::f64) {
+ if (CFP->getValueType(0) == MVT::f64 &&
+ !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -480,7 +483,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -589,7 +592,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -802,7 +806,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -830,7 +835,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -1007,6 +1013,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
case ISD::SET_FPENV:
+ case ISD::SET_FPMODE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(1).getValueType());
break;
@@ -1042,7 +1049,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
case ISD::ATOMIC_STORE:
Action = TLI.getOperationAction(Node->getOpcode(),
- Node->getOperand(2).getValueType());
+ Node->getOperand(1).getValueType());
break;
case ISD::SELECT_CC:
case ISD::STRICT_FSETCC:
@@ -1518,7 +1525,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl);
+ SDValue Idx =
+ DAG.getMemBasePlusOffset(FIPtr, TypeSize::getFixed(Offset), dl);
if (Truncate)
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
@@ -1580,7 +1588,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
// Advance the pointer so that the loaded byte will contain the sign bit.
unsigned ByteOffset = (NumBits / 8) - 1;
IntPtr =
- DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL);
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(ByteOffset), DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -2250,7 +2258,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
// Also pass the return address of the remainder.
SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = FIPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = isSigned;
Entry.IsZExt = !isSigned;
Args.push_back(Entry);
@@ -2341,7 +2349,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -2349,7 +2357,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
- Entry.Ty = RetTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(RetTy->getContext());
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -2649,7 +2657,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
// Store the hi of the constructed double.
- SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
+ SDValue HiPtr =
+ DAG.getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), dl);
SDValue Store2 =
DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -3079,11 +3088,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::ATOMIC_STORE: {
// There is no libcall for atomic store; fake it with ATOMIC_SWAP.
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Node->getOperand(2),
- cast<AtomicSDNode>(Node)->getMemOperand());
+ SDValue Swap = DAG.getAtomic(
+ ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(2), Node->getOperand(1),
+ cast<AtomicSDNode>(Node)->getMemOperand());
Results.push_back(Swap.getValue(1));
break;
}
@@ -3133,6 +3141,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Res.getValue(1));
break;
}
+ case ISD::ATOMIC_LOAD_SUB: {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue RHS = Node->getOperand(2);
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(RHS->getOperand(1))->getVT() == AN->getMemoryVT())
+ RHS = RHS->getOperand(0);
+ SDValue NewRHS =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+ SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(1),
+ NewRHS, AN->getMemOperand());
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -3333,7 +3358,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(DAG.expandVACopy(Node));
break;
case ISD::EXTRACT_VECTOR_ELT:
- if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ if (Node->getOperand(0).getValueType().getVectorElementCount().isScalar())
// This must be an access of the only element. Return it.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
@@ -3904,6 +3929,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Table = Node->getOperand(1);
SDValue Index = Node->getOperand(2);
+ int JTI = cast<JumpTableSDNode>(Table.getNode())->getIndex();
const DataLayout &TD = DAG.getDataLayout();
EVT PTy = TLI.getPointerTy(TD);
@@ -3938,7 +3964,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getPICJumpTableRelocBase(Table, DAG));
}
- Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, JTI, DAG);
Results.push_back(Tmp1);
break;
}
@@ -4418,6 +4444,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80,
RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results);
break;
+ case ISD::FEXP10:
+ ExpandFPLibCall(Node, RTLIB::EXP10_F32, RTLIB::EXP10_F64, RTLIB::EXP10_F80,
+ RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128, Results);
+ break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
@@ -4820,6 +4850,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl));
break;
}
+ case ISD::GET_FPMODE: {
+ // Call fegetmode, which saves control modes into a stack slot. Then load
+ // the value to return from the stack.
+ EVT ModeVT = Node->getValueType(0);
+ SDValue StackPtr = DAG.CreateStackTemporary(ModeVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ SDValue Chain = DAG.makeStateFunctionCall(RTLIB::FEGETMODE, StackPtr,
+ Node->getOperand(0), dl);
+ SDValue LdInst = DAG.getLoad(
+ ModeVT, dl, Chain, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+ Results.push_back(LdInst);
+ Results.push_back(LdInst.getValue(1));
+ break;
+ }
+ case ISD::SET_FPMODE: {
+ // Move control modes to stack slot and then call fesetmode with the pointer
+ // to the slot as argument.
+ SDValue Mode = Node->getOperand(1);
+ EVT ModeVT = Mode.getValueType();
+ SDValue StackPtr = DAG.CreateStackTemporary(ModeVT);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ SDValue StInst = DAG.getStore(
+ Node->getOperand(0), dl, Mode, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETMODE, StackPtr, StInst, dl));
+ break;
+ }
+ case ISD::RESET_FPMODE: {
+ // It is legalized to a call 'fesetmode(FE_DFL_MODE)'. On most targets
+ // FE_DFL_MODE is defined as '((const femode_t *) -1)' in glibc. If not, the
+ // target must provide custom lowering.
+ const DataLayout &DL = DAG.getDataLayout();
+ EVT PtrTy = TLI.getPointerTy(DL);
+ SDValue Mode = DAG.getConstant(-1LL, dl, PtrTy);
+ Results.push_back(DAG.makeStateFunctionCall(RTLIB::FESETMODE, Mode,
+ Node->getOperand(0), dl));
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4961,6 +5031,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::SREM:
case ISD::UDIV:
case ISD::UREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -4977,12 +5051,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::SDIV:
case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
ExtOp = ISD::SIGN_EXTEND;
break;
case ISD::UDIV:
case ISD::UREM:
ExtOp = ISD::ZERO_EXTEND;
break;
+ case ISD::UMIN:
+ case ISD::UMAX:
+ if (TLI.isSExtCheaperThanZExt(OVT, NVT))
+ ExtOp = ISD::SIGN_EXTEND;
+ else
+ ExtOp = ISD::ZERO_EXTEND;
+ break;
}
TruncOp = ISD::TRUNCATE;
}
@@ -5104,7 +5187,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
unsigned ExtOp = ISD::FP_EXTEND;
if (NVT.isInteger()) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
- ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ if (isSignedIntSetCC(CCCode) ||
+ TLI.isSExtCheaperThanZExt(Node->getOperand(0).getValueType(), NVT))
+ ExtOp = ISD::SIGN_EXTEND;
+ else
+ ExtOp = ISD::ZERO_EXTEND;
}
if (Node->isStrictFPOpcode()) {
SDValue InChain = Node->getOperand(0);
@@ -5261,6 +5348,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FABS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Results.push_back(
@@ -5459,6 +5547,23 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(NewAtomic.getValue(1));
break;
}
+ case ISD::SPLAT_VECTOR: {
+ SDValue Scalar = Node->getOperand(0);
+ MVT ScalarType = Scalar.getSimpleValueType();
+ MVT NewScalarType = NVT.getVectorElementType();
+ if (ScalarType.isInteger()) {
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NewScalarType, Scalar);
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewScalarType, Scalar);
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ }
}
// Replace the original node with the legalized result.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7e035d21ef71..c4605a6b9598 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -49,8 +49,7 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -88,6 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
case ISD::STRICT_FEXP2:
case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FEXP10: R = SoftenFloatRes_FEXP10(N); break;
case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
case ISD::STRICT_FLOG:
@@ -414,6 +414,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
RTLIB::EXP2_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP10(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N,
+ GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32, RTLIB::EXP10_F64,
+ RTLIB::EXP10_F80, RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FLOOR_F32,
@@ -890,8 +897,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG));
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -1257,7 +1263,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG));
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1305,6 +1311,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
case ISD::STRICT_FEXP2:
case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FEXP10: ExpandFloatRes_FEXP10(N, Lo, Hi); break;
case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
case ISD::STRICT_FLOG:
@@ -1500,6 +1507,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
RTLIB::EXP2_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FEXP10(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32,
+ RTLIB::EXP10_F64, RTLIB::EXP10_F80,
+ RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -1852,7 +1868,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
/// types of the node are known to be legal, but other operands of the node may
/// need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -2166,8 +2182,7 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
}
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
@@ -2180,6 +2195,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
// to use the promoted float operand. Nodes that produce at least one
// promotion-requiring floating point result have their operands legalized as
// a part of PromoteFloatResult.
+ // clang-format off
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -2191,7 +2207,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_TO_UINT:
+ case ISD::LRINT:
+ case ISD::LLRINT: R = PromoteFloatOp_UnaryOp(N, OpNo); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
@@ -2200,6 +2218,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
}
+ // clang-format on
if (R.getNode())
ReplaceValueWith(SDValue(N, 0), R);
@@ -2233,7 +2252,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
}
// Convert the promoted float value to the desired integer type
-SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+SDValue DAGTypeLegalizer::PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo) {
SDValue Op = GetPromotedFloat(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
}
@@ -2305,8 +2324,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG));
SDValue R = SDValue();
// See if the target wants to custom expand this node.
@@ -2340,6 +2358,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG2:
@@ -2688,7 +2707,7 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": ";
- N->dump(&DAG); dbgs() << "\n");
+ N->dump(&DAG));
SDValue R = SDValue();
// See if the target wants to custom expand this node.
@@ -2721,6 +2740,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG2:
@@ -2754,6 +2774,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI:
case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;
+ case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
+
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
@@ -2882,6 +2904,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(NVT, N->getValueType(1)), Op);
+
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT SVT = N->getOperand(0).getValueType();
@@ -2996,7 +3036,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) {
bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": ";
- N->dump(&DAG); dbgs() << "\n");
+ N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index df5878fcdf2e..362fa92dd44b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -39,8 +39,7 @@ using namespace llvm;
/// may also have invalid operands or may have other results that need
/// expansion, we just know that (at least) one result needs promotion.
void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -60,14 +59,21 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::VP_BITREVERSE:
case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::VP_BSWAP:
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::PARITY:
+ case ISD::VP_CTPOP:
case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -283,12 +289,22 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_FunnelShift(N);
break;
+ case ISD::VP_FSHL:
+ case ISD::VP_FSHR:
+ Res = PromoteIntRes_VPFunnelShift(N);
+ break;
+
case ISD::IS_FPCLASS:
Res = PromoteIntRes_IS_FPCLASS(N);
break;
case ISD::FFREXP:
Res = PromoteIntRes_FFREXP(N);
break;
+
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = PromoteIntRes_XRINT(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -359,7 +375,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
N->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
- return Res.getValue(1);
+ return DAG.getSExtOrTrunc(Res.getValue(1), SDLoc(N), NVT);
}
// Op2 is used for the comparison and thus must be extended according to the
@@ -516,8 +532,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BSWAP)
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt,
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -537,9 +560,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT,
- DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BITREVERSE)
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL),
+ ShAmt, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -584,12 +613,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
// Subtract off the extra leading bits in the bigger type.
- return DAG.getNode(
- ISD::SUB, dl, NVT, Op,
- DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
- NVT));
+ SDValue ExtractLeadingBits = DAG.getConstant(
+ NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
+ if (!N->isVPOpcode())
+ return DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op),
+ ExtractLeadingBits);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
+ ExtractLeadingBits, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
@@ -611,7 +647,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1), N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -635,15 +674,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
}
}
- if (N->getOpcode() == ISD::CTTZ) {
+ if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ if (N->getOpcode() == ISD::CTTZ)
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ else
+ Op =
+ DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT),
+ N->getOperand(1), N->getOperand(2));
}
- return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1),
+ N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -740,6 +787,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -1366,6 +1419,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
+// A vp version of PromoteIntRes_FunnelShift.
+SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amt = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue EVL = N->getOperand(4);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::VP_FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amt = DAG.getNode(ISD::VP_UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT), Mask, EVL);
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL);
+ // FIXME: Replace it by vp operations.
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL);
+ Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt,
+ Mask, EVL);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
+ Lo = DAG.getNode(ISD::VP_SHL, DL, VT, Lo, ShiftOffset, Mask, EVL);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amt = DAG.getNode(ISD::VP_ADD, DL, AmtVT, Amt, ShiftOffset, Mask, EVL);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt, Mask, EVL);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
@@ -1638,8 +1745,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
@@ -1721,8 +1827,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
- case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
-
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1859,9 +1963,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op1 = GetPromotedInteger(N->getOperand(1));
return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
- N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
+ N->getChain(), Op1, N->getBasePtr(), N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
@@ -2236,18 +2340,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
- assert(OpNo > 1 && "Don't know how to promote this operand!");
- // Promote the rw, locality, and cache type arguments to a supported integer
- // width.
- SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
- SDValue Op3 = ZExtPromotedInteger(N->getOperand(3));
- SDValue Op4 = ZExtPromotedInteger(N->getOperand(4));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
- Op2, Op3, Op4),
- 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
@@ -2466,8 +2558,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG));
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -2512,9 +2603,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_SINT:
- case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_XINT(N, Lo, Hi); break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
case ISD::STRICT_LROUND:
@@ -3591,43 +3682,24 @@ void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Chain);
}
-void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
-
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
- Op = GetSoftPromotedHalf(Op);
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
- Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
- SplitInteger(Op, Lo, Hi);
- return;
+// Helper for producing an FP_EXTEND/STRICT_FP_EXTEND of Op.
+static SDValue fpExtendHelper(SDValue Op, SDValue &Chain, bool IsStrict, EVT VT,
+ SDLoc DL, SelectionDAG &DAG) {
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op});
+ Chain = Op.getValue(1);
+ return Op;
}
-
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
- CallOptions, dl, Chain);
- SplitInteger(Tmp.first, Lo, Hi);
-
- if (IsStrict)
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Op);
}
-void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
@@ -3635,17 +3707,26 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
Op = GetPromotedFloat(Op);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ EVT OFPVT = Op.getValueType();
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), OFPVT);
Op = GetSoftPromotedHalf(Op);
- Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
- Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ Op = DAG.getNode(OFPVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP, dl,
+ NFPVT, Op);
+ Op = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, VT, Op);
SplitInteger(Op, Lo, Hi);
return;
}
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ if (Op.getValueType() == MVT::bf16) {
+ // Extend to f32 as there is no bf16 libcall.
+ Op = fpExtendHelper(Op, Chain, IsStrict, MVT::f32, dl, DAG);
+ }
+
+ RTLIB::Libcall LC = IsSigned ? RTLIB::getFPTOSINT(Op.getValueType(), VT)
+ : RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-xint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
CallOptions, dl, Chain);
SplitInteger(Tmp.first, Lo, Hi);
@@ -3673,14 +3754,9 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
EVT VT = Op.getValueType();
if (VT == MVT::f16) {
- VT = MVT::f32;
// Extend to f32.
- if (IsStrict) {
- Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op});
- Chain = Op.getValue(1);
- } else {
- Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op);
- }
+ VT = MVT::f32;
+ Op = fpExtendHelper(Op, Chain, IsStrict, VT, dl, DAG);
}
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -3754,20 +3830,7 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
- if (N->isAtomic()) {
- // It's typical to have larger CAS than atomic load instructions.
- SDLoc dl(N);
- EVT VT = N->getMemoryVT();
- SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
- SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue Swap = DAG.getAtomicCmpSwap(
- ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
- VT, VTs, N->getOperand(0),
- N->getOperand(1), Zero, Zero, N->getMemOperand());
- ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
- return;
- }
+ assert(!N->isAtomic() && "Should have been a ATOMIC_LOAD?");
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
@@ -3822,7 +3885,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -3846,7 +3909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -4760,7 +4823,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// Also pass the address of the overflow check.
Entry.Node = Temp;
- Entry.Ty = PtrTy->getPointerTo();
+ Entry.Ty = PointerType::getUnqual(PtrTy->getContext());
Entry.IsSExt = true;
Entry.IsZExt = false;
Args.push_back(Entry);
@@ -4988,8 +5051,7 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG));
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -5014,11 +5076,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::STRICT_SINT_TO_FP:
- case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_XINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
- case ISD::STRICT_UINT_TO_FP:
- case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -5067,16 +5129,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
GetExpandedInteger(NewRHS, RHSLo, RHSHi);
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
- if (RHSLo == RHSHi) {
- if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnes()) {
- // Equality comparison to -1.
- NewLHS = DAG.getNode(ISD::AND, dl,
- LHSLo.getValueType(), LHSLo, LHSHi);
- NewRHS = RHSLo;
- return;
- }
- }
+ if (RHSLo == RHSHi && isAllOnesConstant(RHSLo)) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl, LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
}
NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
@@ -5303,14 +5360,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandIntOp_XINT_TO_FP(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ RTLIB::Libcall LC = IsSigned ? RTLIB::getSINTTOFP(Op.getValueType(), DstVT)
+ : RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Don't know how to expand this SINT_TO_FP!");
+ "Don't know how to expand this XINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
std::pair<SDValue, SDValue> Tmp =
@@ -5325,16 +5385,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
- if (N->isAtomic()) {
- // It's typical to have larger CAS than atomic store instructions.
- SDLoc dl(N);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- N->getMemoryVT(),
- N->getOperand(0), N->getOperand(2),
- N->getOperand(1),
- N->getMemOperand());
- return Swap.getValue(1);
- }
+ assert(!N->isAtomic() && "Should have been a ATOMIC_STORE?");
+
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
@@ -5372,7 +5424,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -5407,7 +5459,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -5423,34 +5475,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- EVT DstVT = N->getValueType(0);
- RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Don't know how to expand this UINT_TO_FP!");
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- std::pair<SDValue, SDValue> Tmp =
- TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
-
- if (!IsStrict)
- return Tmp.first;
-
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
- ReplaceValueWith(SDValue(N, 0), Tmp.first);
- return SDValue();
-}
-
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
SDLoc dl(N);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- cast<AtomicSDNode>(N)->getMemoryVT(),
- N->getOperand(0),
- N->getOperand(1), N->getOperand(2),
- cast<AtomicSDNode>(N)->getMemOperand());
+ SDValue Swap =
+ DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0), N->getOperand(2), N->getOperand(1),
+ cast<AtomicSDNode>(N)->getMemOperand());
return Swap.getValue(1);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 328939e44dcb..8a93433c5e04 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -235,7 +235,7 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing node: "; N->dump(&DAG));
if (IgnoreNodeResults(N)) {
LLVM_DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
@@ -390,8 +390,7 @@ ScanOperands:
}
if (i == NumOperands) {
- LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG));
}
}
NodeDone:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index db8f61eee606..9d5931b44ac6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@ private:
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N);
+ SDValue PromoteIntRes_XRINT(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
@@ -362,6 +363,7 @@ private:
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
// Integer Operand Promotion.
@@ -400,7 +402,6 @@ private:
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
- SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
@@ -442,8 +443,7 @@ private:
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_XINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -494,10 +494,9 @@ private:
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
- SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
- SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_XINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_RETURNADDR(SDNode *N);
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N);
@@ -552,6 +551,7 @@ private:
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FEXP10(SDNode *N);
SDValue SoftenFloatRes_FFLOOR(SDNode *N);
SDValue SoftenFloatRes_FLOG(SDNode *N);
SDValue SoftenFloatRes_FLOG2(SDNode *N);
@@ -633,6 +633,7 @@ private:
void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP10 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -711,7 +712,7 @@ private:
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
- SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -737,6 +738,7 @@ private:
SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
@@ -888,6 +890,7 @@ private:
void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -986,6 +989,7 @@ private:
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
@@ -1000,6 +1004,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 296242c00401..a55364ea2c4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -176,7 +176,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
StackPtr =
- DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(IncrementSize), dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -265,7 +265,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl);
Hi = DAG.getLoad(
NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
@@ -479,7 +479,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getOriginalAlign(), St->getMemOperand()->getFlags(),
AAInfo);
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getStore(
Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3862fd241897..1fbd6322f9ed 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -166,6 +166,21 @@ class VectorLegalizer {
/// truncated back to the original type.
void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ /// Implements vector reduce operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type, and the start value is promoted to a larger scalar type. Then the
+ /// result is truncated back to the original scalar type.
+ void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ /// Implements vector setcc operation promotion.
+ ///
+ /// All vector operands are promoted to a vector type with larger element
+ /// type.
+ void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -385,9 +400,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FLOG10:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FCEIL:
case ISD::FTRUNC:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
@@ -551,6 +569,116 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
return true;
}
+void VectorLegalizer::PromoteReduction(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+ MVT ScalarVT = Node->getSimpleValueType(0);
+ MVT NewScalarVT = NewVecVT.getVectorElementType();
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ // promote the initial value.
+ if (Node->getOperand(0).getValueType().isFloatingPoint())
+ Operands[0] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+ else
+ Operands[0] =
+ DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0));
+
+ for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ // promote the vector operand.
+ if (Node->getOperand(j).getValueType().isFloatingPoint())
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] =
+ DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j); // Skip VL operand.
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands,
+ Node->getFlags());
+
+ if (ScalarVT.isFloatingPoint())
+ Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ else
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res);
+
+ Results.push_back(Res);
+}
+
+void VectorLegalizer::PromoteSETCC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(0).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+
+ Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
+ Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
+ Operands[2] = Node->getOperand(2);
+
+ if (Node->getOpcode() == ISD::VP_SETCC) {
+ Operands[3] = Node->getOperand(3); // mask
+ Operands[4] = Node->getOperand(4); // evl
+ }
+
+ SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
+ Operands, Node->getFlags());
+
+ Results.push_back(Res);
+}
+
+void VectorLegalizer::PromoteSTRICT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ assert(VecVT.isFloatingPoint());
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+ SmallVector<SDValue, 2> Chains;
+
+ for (unsigned j = 1; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ {
+ // promote the vector operand.
+ SDValue Ext =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(j)});
+ Operands[j] = Ext.getValue(0);
+ Chains.push_back(Ext.getValue(1));
+ } else
+ Operands[j] = Node->getOperand(j); // Skip no vector operand.
+
+ SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
+
+ Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
+
+ SDValue Round =
+ DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
+ {Res.getValue(1), Res.getValue(0),
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
+
+ Results.push_back(Round.getValue(0));
+ Results.push_back(Round.getValue(1));
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -569,6 +697,36 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteFP_TO_INT(Node, Results);
return;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ // Promote the operation by extending the operand.
+ PromoteReduction(Node, Results);
+ return;
+ case ISD::VP_SETCC:
+ case ISD::SETCC:
+ // Promote the operation by extending the operand.
+ PromoteSETCC(Node, Results);
+ return;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ PromoteSTRICT(Node, Results);
+ return;
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
@@ -589,7 +747,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
SmallVector<SDValue, 4> Operands(Node->getNumOperands());
for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
- if (Node->getOperand(j).getValueType().isVector())
+ // Do not promote the mask operand of a VP OP.
+ bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j;
+ if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
if (Node->getOperand(j)
.getValueType()
.getVectorElementType()
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8c117c1c74dc..66461b26468f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG));
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -88,6 +88,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
@@ -100,6 +101,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FSIN:
@@ -656,8 +659,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG));
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -680,6 +683,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Res = ScalarizeVecOp_UnaryOp(N);
break;
case ISD::STRICT_SINT_TO_FP:
@@ -965,7 +970,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
/// invalid operands or may have other results that need legalization, we just
/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG));
SDValue Lo, Hi;
// See if the target wants to custom expand this node.
@@ -1075,6 +1080,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::VP_FFLOOR:
case ISD::FLOG:
@@ -1095,6 +1101,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
case ISD::VP_FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
@@ -1201,6 +1209,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ SplitVecRes_VP_REVERSE(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1228,7 +1239,7 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
} else {
MPI = N->getPointerInfo().getWithOffset(IncrementSize);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize));
}
}
@@ -2849,6 +2860,56 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
}
+void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDValue Val = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ SDLoc DL(N);
+
+ // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD.
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount());
+ SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
+ EVT PtrVT = StackPtr.getValueType();
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ Alignment);
+ MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ Alignment);
+
+ unsigned EltWidth = VT.getScalarSizeInBits() / 8;
+ SDValue NumElemMinus1 =
+ DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT),
+ DAG.getConstant(1, DL, PtrVT));
+ SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1,
+ DAG.getConstant(EltWidth, DL, PtrVT));
+ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset);
+ SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT);
+
+ SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
+ SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr,
+ DAG.getUNDEF(PtrVT), Stride, TrueMask,
+ EVL, MemVT, StoreMMO, ISD::UNINDEXED);
+
+ SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
@@ -2889,7 +2950,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
/// the node are known to be legal, but other operands of the node may need
/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom split this node.
@@ -2972,6 +3033,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Res = SplitVecOp_UnaryOp(N);
break;
case ISD::FLDEXP:
@@ -3973,8 +4036,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
- LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
// See if the target wants to custom widen this node.
if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -4195,11 +4257,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_FP_TO_XINT_SAT(N);
break;
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = WidenVecRes_XRINT(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
@@ -4779,6 +4847,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
@@ -5919,8 +6008,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
// Widen Vector Operand
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
- LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG));
SDValue Res = SDValue();
// See if the target wants to custom widen this node.
@@ -5946,6 +6034,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = WidenVecOp_EXTEND_VECTOR_INREG(N);
+ break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
@@ -5955,7 +6048,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FLDEXP:
- case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
+ case ISD::FCOPYSIGN:
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ Res = WidenVecOp_UnrollVectorOp(N);
+ break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
@@ -6317,8 +6414,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
SubVec = GetWidenedVector(SubVec);
- if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
- N->getConstantOperandVal(2) == 0)
+ EVT SubVT = SubVec.getValueType();
+
+ // Whether or not all the elements of the widened SubVec will be inserted into
+ // valid indices of VT.
+ bool IndicesValid = false;
+ // If we statically know that VT can fit SubVT, the indices are valid.
+ if (VT.knownBitsGE(SubVT))
+ IndicesValid = true;
+ else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
+ // Otherwise, if we're inserting a fixed vector into a scalable vector and
+ // we know the minimum vscale we can work out if it's valid ourselves.
+ Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
+ Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
+ SubVT.getFixedSizeInBits())
+ IndicesValid = true;
+ }
+ }
+
+ // We need to make sure that the indices are still valid, otherwise we might
+ // widen what was previously well-defined to something undefined.
+ if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
N->getOperand(2));
@@ -6338,6 +6457,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
N->getValueType(0), InOp, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), InOp);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
// We have to widen the value, but we want only to store the original
// vector type.
@@ -6458,7 +6582,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
- assert((OpNo == 1 || OpNo == 3) &&
+ assert((OpNo == 1 || OpNo == 4) &&
"Can widen only data or mask operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
@@ -7083,7 +7207,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr =
- DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset));
+ DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
LD->getOriginalAlign(), MMOFlags, AAInfo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 5b01743d23e0..ab4c33c9e976 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -498,12 +498,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ unsigned NumVals = F.getNumOperandRegisters();
++i; // Skip the ID value.
- if (InlineAsm::isRegDefKind(Flags) ||
- InlineAsm::isRegDefEarlyClobberKind(Flags) ||
- InlineAsm::isClobberKind(Flags)) {
+ if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isClobberKind()) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -808,12 +808,12 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-llvm::ScheduleDAGSDNodes *
-llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::ScheduleDAGSDNodes *llvm::createFastDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGFast(*IS->MF);
}
-llvm::ScheduleDAGSDNodes *
-llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::ScheduleDAGSDNodes *llvm::createDAGLinearizer(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGLinearize(*IS->MF);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 458f50c54824..47c137d2bcad 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -188,10 +188,9 @@ private:
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
- CodeGenOpt::Level OptLevel)
- : ScheduleDAGSDNodes(mf),
- NeedLatency(needlatency), AvailableQueue(availqueue),
- Topo(SUnits, nullptr) {
+ CodeGenOptLevel OptLevel)
+ : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
+ AvailableQueue(availqueue), Topo(SUnits, nullptr) {
const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
@@ -987,11 +986,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
- // unfolding an x86 DEC64m operation results in store, dec, load which
- // can't be handled here so quit
- if (NewNodes.size() == 3)
- return nullptr;
-
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -1377,12 +1371,12 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const InlineAsm::Flag F(Flags);
+ unsigned NumVals = F.getNumOperandRegisters();
++i; // Skip the ID value.
- if (InlineAsm::isRegDefKind(Flags) ||
- InlineAsm::isRegDefEarlyClobberKind(Flags) ||
- InlineAsm::isClobberKind(Flags)) {
+ if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
+ F.isClobberKind()) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -3150,9 +3144,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ScheduleDAGSDNodes *llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3166,7 +3159,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3180,7 +3173,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
@@ -3194,9 +3187,8 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ScheduleDAGSDNodes *llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
const TargetInstrInfo *TII = STI.getInstrInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0579c1664d5c..c9e2745f00c9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -659,18 +659,19 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Use->isMachineOpcode())
// Adjust the use operand index by num of defs.
OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
- int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
- if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ std::optional<unsigned> Latency =
+ TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
if (Register::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
- Latency = Latency - 1;
+ Latency = *Latency - 1;
}
- if (Latency >= 0)
- dep.setLatency(Latency);
+ if (Latency)
+ dep.setLatency(*Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 1ba1fd65b8c9..ae42a870ea2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -265,7 +265,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
//===----------------------------------------------------------------------===//
/// createVLIWDAGScheduler - This creates a top-down list scheduler.
-ScheduleDAGSDNodes *
-llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ScheduleDAGSDNodes *llvm::createVLIWDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel) {
return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5c1b19eba1c1..5be1892a44f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -161,8 +162,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
unsigned SplatBitSize;
bool HasUndefs;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ // Endianness does not matter here. We are checking for a splat given the
+ // element size of the vector, and if we find such a splat for little endian
+ // layout, then that should be valid also for big endian (as the full vector
+ // size is known to be a multiple of the element size).
+ const bool IsBigEndian = false;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
- EltSize) &&
+ EltSize, IsBigEndian) &&
EltSize == SplatBitSize;
}
@@ -344,12 +350,13 @@ bool ISD::isFreezeUndef(const SDNode *N) {
return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef();
}
-bool ISD::matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match,
- bool AllowUndefs) {
+template <typename ConstNodeType>
+bool ISD::matchUnaryPredicateImpl(SDValue Op,
+ std::function<bool(ConstNodeType *)> Match,
+ bool AllowUndefs) {
// FIXME: Add support for scalar UNDEF cases?
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
- return Match(Cst);
+ if (auto *C = dyn_cast<ConstNodeType>(Op))
+ return Match(C);
// FIXME: Add support for vector UNDEF cases?
if (ISD::BUILD_VECTOR != Op.getOpcode() &&
@@ -364,12 +371,17 @@ bool ISD::matchUnaryPredicate(SDValue Op,
continue;
}
- auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i));
if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
return false;
}
return true;
}
+// Build used template types.
+template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>(
+ SDValue, std::function<bool(ConstantSDNode *)>, bool);
+template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>(
+ SDValue, std::function<bool(ConstantFPSDNode *)>, bool);
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
@@ -951,7 +963,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
if (N->getValueType(0) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
switch (N->getOpcode()) {
default: break;
@@ -963,7 +975,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
if (N->getValueType(i) == MVT::Glue)
- return true; // Never CSE anything that produces a flag.
+ return true; // Never CSE anything that produces a glue result.
return false;
}
@@ -1197,7 +1209,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
#ifndef NDEBUG
// Verify that the node was actually in one of the CSE maps, unless it has a
- // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // glue result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
@@ -1296,17 +1308,16 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
}
Align SelectionDAG::getEVTAlign(EVT VT) const {
- Type *Ty = VT == MVT::iPTR ?
- PointerType::get(Type::getInt8Ty(*getContext()), 0) :
- VT.getTypeForEVT(*getContext());
+ Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0)
+ : VT.getTypeForEVT(*getContext());
return getDataLayout().getABITypeAlign(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), OptLevel(OL),
- EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)),
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL)
+ : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(),
+ getVTList(MVT::Other, MVT::Glue)),
Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
@@ -1454,6 +1465,51 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
+SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getAnyExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getSExtOrTrunc(DestOp, DL, VT);
+}
+
+SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(!VT.isVector());
+ auto Type = Op.getValueType();
+ SDValue DestOp;
+ if (Type == VT)
+ return Op;
+ auto Size = Op.getValueSizeInBits();
+ DestOp = getBitcast(MVT::getIntegerVT(Size), Op);
+ if (DestOp.getValueType() == VT)
+ return DestOp;
+
+ return getZExtOrTrunc(DestOp, DL, VT);
+}
+
SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
@@ -1570,7 +1626,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ APInt NewVal;
+ if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
+ NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
+ else
+ NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
@@ -1587,7 +1647,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
// For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
- if (VT.isScalableVector()) {
+ if (VT.isScalableVector() ||
+ TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
"Can only handle an even split!");
unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;
@@ -1801,6 +1862,13 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain,
+ const SDLoc &DL) {
+ EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout());
+ return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain,
+ getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true));
+}
+
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
MaybeAlign Alignment, int Offset,
bool isTarget, unsigned TargetFlags) {
@@ -1855,23 +1923,6 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned TargetFlags) {
- FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt);
- ID.AddInteger(Index);
- ID.AddInteger(Offset);
- ID.AddInteger(TargetFlags);
- void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, IP))
- return SDValue(E, 0);
-
- auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- return SDValue(N, 0);
-}
-
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt);
@@ -1945,15 +1996,15 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
assert(MulImm.getBitWidth() == VT.getSizeInBits() &&
"APInt size does not match type size!");
+ if (MulImm == 0)
+ return getConstant(0, DL, VT);
+
if (ConstantFold) {
const MachineFunction &MF = getMachineFunction();
- auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
- if (Attr.isValid()) {
- unsigned VScaleMin = Attr.getVScaleRangeMin();
- if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
- if (*VScaleMax == VScaleMin)
- return getConstant(MulImm * VScaleMin, DL, VT);
- }
+ const Function &F = MF.getFunction();
+ ConstantRange CR = getVScaleRange(&F, 64);
+ if (const APInt *C = CR.getSingleElement())
+ return getConstant(MulImm * C->getZExtValue(), DL, VT);
}
return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
@@ -2118,11 +2169,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (Splat && UndefElements.none()) {
// Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
// number of elements match or the value splatted is a zero constant.
- if (SameNumElts)
+ if (SameNumElts || isNullConstant(Splat))
return N1;
- if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isZero())
- return N1;
}
// If the shuffle itself creates a splat, build the vector directly.
@@ -2487,7 +2535,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
- if (N1 == N2)
+ if (N1.isUndef() || N2.isUndef() || N1 == N2)
return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
}
@@ -2833,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
}
+ // Fallback - this is a splat if all demanded elts are the same constant.
+ if (computeKnownBits(V, DemandedElts, Depth).isConstant()) {
+ UndefElts = ~DemandedElts;
+ return true;
+ }
+
return false;
}
@@ -3054,6 +3108,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
break;
}
+ case ISD::SPLAT_VECTOR_PARTS: {
+ unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits();
+ assert(ScalarSize * Op.getNumOperands() == BitWidth &&
+ "Expected SPLAT_VECTOR_PARTS scalars to cover element width");
+ for (auto [I, SrcOp] : enumerate(Op->ops())) {
+ Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I);
+ }
+ break;
+ }
case ISD::BUILD_VECTOR:
assert(!Op.getValueType().isScalableVector());
// Collect the known bits that are shared by every demanded vector element.
@@ -3685,14 +3748,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
- // TODO: Compute influence of the carry operand.
- if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
- break;
+ // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in.
+ KnownBits Borrow(1);
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) {
+ Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Borrow has bit width 1
+ Borrow = Borrow.trunc(1);
+ } else {
+ Borrow.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
- Known, Known2);
+ Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow);
break;
}
case ISD::UADDO:
@@ -3717,15 +3785,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
- // TODO: Compute known bits for the carry operand. Not sure if it is worth
- // the trouble (how often will we find a known carry bit). And I haven't
- // tested this very much yet, but something like this might work:
- // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
- // Carry = Carry.zextOrTrunc(1, false);
- Carry.resetAll();
- else
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) {
+ Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry has bit width 1
+ Carry = Carry.trunc(1);
+ } else {
Carry.setAllZero();
+ }
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -4044,8 +4110,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
return OFK_Never;
- // TODO: Add ConstantRange::signedSubMayOverflow handling.
- return OFK_Sometime;
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true);
+ return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range));
}
SelectionDAG::OverflowKind
@@ -4054,7 +4123,53 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
if (isNullConstant(N1))
return OFK_Never;
- // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const {
+ // X * 0 and X * 1 never overflow.
+ if (isNullConstant(N1) || isOneConstant(N1))
+ return OFK_Never;
+
+ // Get the size of the result.
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+
+ // Sum of the sign bits.
+ unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1);
+
+ // If we have enough sign bits, then there's no overflow.
+ if (SignBits > BitWidth + 1)
+ return OFK_Never;
+
+ if (SignBits == BitWidth + 1) {
+ // The overflow occurs when the true multiplication of the
+ // the operands is the minimum negative number.
+ KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N1Known = computeKnownBits(N1);
+ // If one of the operands is non-negative, then there's no
+ // overflow.
+ if (N0Known.isNonNegative() || N1Known.isNonNegative())
+ return OFK_Never;
+ }
+
return OFK_Sometime;
}
@@ -4066,8 +4181,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
unsigned BitWidth = OpVT.getScalarSizeInBits();
// Is the constant a known power of 2?
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
- return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) {
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }))
+ return true;
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
@@ -4075,6 +4192,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
// Similarly, a logical right-shift of a constant sign-bit will have exactly
@@ -4083,8 +4202,13 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue().isSignMask())
return true;
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) &&
+ isKnownNeverZero(Val, Depth);
}
+ if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
@@ -4106,6 +4230,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
return true;
+ if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX ||
+ Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
+ if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) &&
+ isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1);
+
+ if (Val.getOpcode() == ISD::AND) {
+ // Looking for `x & -x` pattern:
+ // If x == 0:
+ // x & -x -> 0
+ // If x != 0:
+ // x & -x -> non-zero pow2
+ // so if we find the pattern return whether we know `x` is non-zero.
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ SDValue NegOp = Val.getOperand(OpIdx);
+ if (NegOp.getOpcode() == ISD::SUB &&
+ NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) &&
+ isNullOrNullSplat(NegOp.getOperand(0)))
+ return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth);
+ }
+ }
+
+ if (Val.getOpcode() == ISD::ZERO_EXTEND)
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1);
+
// More could be done here, though the above checks are enough
// to handle some common cases.
return false;
@@ -4866,8 +5018,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case ISD::AssertSext:
- case ISD::AssertZext:
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
@@ -4883,7 +5033,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BITREVERSE:
case ISD::PARITY:
case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -4893,6 +5042,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;
+ // Matches hasPoisonGeneratingFlags().
+ case ISD::ZERO_EXTEND:
+ return ConsiderFlags && Op->getFlags().hasNonNeg();
+
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
@@ -4929,6 +5082,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return true;
}
+bool SelectionDAG::isADDLike(SDValue Op) const {
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::OR)
+ return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(Op.getOperand(1));
+ return false;
+}
+
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
@@ -4974,12 +5136,15 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FCANONICALIZE:
case ISD::FEXP:
case ISD::FEXP2:
+ case ISD::FEXP10:
case ISD::FTRUNC:
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
+ case ISD::LRINT:
+ case ISD::LLRINT:
case ISD::FNEARBYINT:
case ISD::FLDEXP: {
if (SNaN)
@@ -5109,21 +5274,29 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(2), Depth + 1);
- case ISD::SHL:
+ case ISD::SHL: {
if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
-
- // 1 << X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ // 1 << X is never zero.
+ if (ValKnown.One[0])
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.shl(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UADDSAT:
case ISD::UMAX:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // TODO for smin/smax: If either operand is known negative/positive
+ // respectively we don't need the other to be known at all.
+ case ISD::SMAX:
+ case ISD::SMIN:
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1);
@@ -5137,16 +5310,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
case ISD::SRA:
- case ISD::SRL:
+ case ISD::SRL: {
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
- // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
- // The expression is really
- // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
- if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (ValKnown.isNegative())
+ return true;
+ // If max shift cnt of known ones is non-zero, result is non-zero.
+ APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ if (MaxCnt.ult(ValKnown.getBitWidth()) &&
+ !ValKnown.One.lshr(MaxCnt).isZero())
return true;
break;
-
+ }
case ISD::UDIV:
case ISD::SDIV:
// div exact can only produce a zero if the dividend is zero.
@@ -5422,161 +5598,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
- // Constant fold unary operations with an integer constant operand. Even
- // opaque constant will be folded, because the folding of unary operations
- // doesn't create new constants with different values. Nevertheless, the
- // opaque flag is preserved during folding to prevent future folding with
- // other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- const APInt &Val = C->getAPIntValue();
- switch (Opcode) {
- default: break;
- case ISD::SIGN_EXTEND:
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::TRUNCATE:
- if (C->isOpaque())
- break;
- [[fallthrough]];
- case ISD::ZERO_EXTEND:
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::ANY_EXTEND:
- // Some targets like RISCV prefer to sign extend some types.
- if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
- return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
- C->isTargetOpcode(), C->isOpaque());
- case ISD::UINT_TO_FP:
- case ISD::SINT_TO_FP: {
- APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getZero(VT.getSizeInBits()));
- (void)apf.convertFromAPInt(Val,
- Opcode==ISD::SINT_TO_FP,
- APFloat::rmNearestTiesToEven);
- return getConstantFP(apf, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
- return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
- if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
- if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
- if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
- return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
- break;
- case ISD::ABS:
- return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BITREVERSE:
- return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::BSWAP:
- return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTPOP:
- return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
- C->isOpaque());
- case ISD::FP16_TO_FP:
- case ISD::BF16_TO_FP: {
- bool Ignored;
- APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
-
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)FPV.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstantFP(FPV, DL, VT);
- }
- case ISD::STEP_VECTOR: {
- if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
- return V;
- break;
- }
- }
- }
-
- // Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
- APFloat V = C->getValueAPF(); // make copy
- switch (Opcode) {
- case ISD::FNEG:
- V.changeSign();
- return getConstantFP(V, DL, VT);
- case ISD::FABS:
- V.clearSign();
- return getConstantFP(V, DL, VT);
- case ISD::FCEIL: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FTRUNC: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FFLOOR: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, DL, VT);
- break;
- }
- case ISD::FP_EXTEND: {
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
- return getConstantFP(V, DL, VT);
- }
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
- bool ignored;
- APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
- // FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s =
- V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
- if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
- break;
- return getConstant(IntVal, DL, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
- return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
- return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
- if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- break;
- case ISD::FP_TO_FP16:
- case ISD::FP_TO_BF16: {
- bool Ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
- : APFloat::BFloat(),
- APFloat::rmNearestTiesToEven, &Ignored);
- return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
- }
- }
- }
// Constant fold unary operations with a vector integer or float operand.
switch (Opcode) {
@@ -5592,12 +5613,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
case ISD::TRUNCATE:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP:
+ case ISD::BITCAST:
case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -5605,7 +5631,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTPOP: {
+ case ISD::CTPOP:
+ case ISD::STEP_VECTOR: {
SDValue Ops = {N1};
if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
@@ -5694,6 +5721,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
+
+ // Skip unnecessary zext_inreg pattern:
+ // (zext (trunc x)) -> x iff the upper bits are known zero.
+ // TODO: Remove (zext (trunc (and x, c))) exception which some targets
+ // use to recognise zext_inreg patterns.
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = N1.getOperand(0);
+ if (OpOp.getValueType() == VT) {
+ if (OpOp.getOpcode() != ISD::AND) {
+ APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(),
+ N1.getScalarValueSizeInBits());
+ if (MaskedValueIsZero(OpOp, HiBits)) {
+ transferDbgValues(N1, OpOp);
+ return OpOp;
+ }
+ }
+ }
+ }
break;
case ISD::ANY_EXTEND:
assert(VT.isInteger() && N1.getValueType().isInteger() &&
@@ -5850,7 +5895,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1};
- if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE glue producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
@@ -6037,9 +6082,174 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
+ // Handle unary special cases.
+ if (NumOps == 1) {
+ SDValue N1 = Ops[0];
+
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (auto *C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::TRUNCATE:
+ if (C->isOpaque())
+ break;
+ [[fallthrough]];
+ case ISD::ZERO_EXTEND:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ // Some targets like RISCV prefer to sign extend some types.
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getZero(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
+ case ISD::STEP_VECTOR:
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
+ return V;
+ break;
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
+ break;
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ return SDValue();
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ bool ignored;
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ return getConstant(IntVal, DL, VT);
+ }
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL,
+ VT);
+ if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ }
+ }
+
+ // Early-out if we failed to constant fold a bitcast.
+ if (Opcode == ISD::BITCAST)
+ return SDValue();
+ }
+
// Handle binops special cases.
if (NumOps == 2) {
- if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1]))
+ if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops))
return CFP;
if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
@@ -6232,11 +6442,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDValue N1, SDValue N2) {
+ EVT VT, ArrayRef<SDValue> Ops) {
+ // TODO: Add support for unary/ternary fp opcodes.
+ if (Ops.size() != 2)
+ return SDValue();
+
// TODO: We don't do any constant folding for strict FP opcodes here, but we
// should. That will require dealing with a potentially non-default
// rounding mode, checking the "opStatus" return value from the APFloat
// math calculations, and possibly other variations.
+ SDValue N1 = Ops[0];
+ SDValue N2 = Ops[1];
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
if (N1CFP && N2CFP) {
@@ -6597,6 +6813,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
return getBuildVector(VT, DL, Ops);
}
+
+ if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ isa<ConstantSDNode>(N1.getOperand(0)))
+ return getNode(
+ ISD::SPLAT_VECTOR, DL, VT,
+ SignExtendInReg(N1.getConstantOperandAPInt(0),
+ N1.getOperand(0).getValueType()));
break;
}
case ISD::FP_TO_SINT_SAT:
@@ -6865,7 +7088,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Operand is DELETED_NODE!");
// Perform various simplifications.
switch (Opcode) {
- case ISD::FMA: {
+ case ISD::FMA:
+ case ISD::FMAD: {
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == VT && N2.getValueType() == VT &&
N3.getValueType() == VT && "FMA types must match!");
@@ -6876,7 +7100,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
- V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ if (Opcode == ISD::FMAD) {
+ V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ V1.add(V3, APFloat::rmNearestTiesToEven);
+ } else
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
return getConstantFP(V1, DL, VT);
}
break;
@@ -6998,7 +7226,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
- // Memoize node if it doesn't produce a flag.
+ // Memoize node if it doesn't produce a glue result.
SDNode *N;
SDVTList VTs = getVTList(VT);
SDValue Ops[] = {N1, N2, N3};
@@ -7339,7 +7567,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Value.getNode()) {
Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
}
@@ -7364,14 +7592,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(
ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo);
OutStoreChains.push_back(Store);
}
@@ -7508,7 +7736,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getLoad(
VT, dl, Chain,
- DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl),
SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
@@ -7523,7 +7751,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getStore(
Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo);
OutChains.push_back(Store);
DstOff += VTSize;
@@ -7628,19 +7856,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
// If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
+ // the smaller value for free with a truncate or extract vector element and
+ // then store.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
+ unsigned Index;
+ unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
+ EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
- else
+ else if (LargestVT.isVector() && !VT.isVector() &&
+ TLI.shallExtractConstSplatVectorElementToStore(
+ LargestVT.getTypeForEVT(*DAG.getContext()),
+ VT.getSizeInBits(), Index) &&
+ TLI.isTypeLegal(SVT) &&
+ LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
+ // Target which can combine store(extractelement VectorTy, Idx) can get
+ // the smaller value for free.
+ SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
+ Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
+ DAG.getVectorIdxConstant(Index, dl));
+ } else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl),
DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone,
NewAAInfo);
@@ -7714,7 +7957,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7816,7 +8059,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = Type::getInt8PtrTy(*getContext());
+ Entry.Ty = PointerType::getUnqual(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
@@ -7930,8 +8173,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// FIXME: pass in SDLoc
CLI.setDebugLoc(dl).setChain(Chain);
- ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
- const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
// Helper function to create an Entry from Node and Type.
@@ -7943,16 +8184,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
};
// If zeroing out and bzero is present, use it.
- if (SrcIsZero && BzeroName) {
+ if (isNullConstant(Src) && BzeroName) {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(
TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
} else {
TargetLowering::ArgListTy Args;
- Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx)));
Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
@@ -8124,7 +8365,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
MemIntrinsicSDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -9642,6 +9883,27 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]);
return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags);
}
+
+ if (VTList.VTs[0].isVector() &&
+ VTList.VTs[0].getVectorElementType() == MVT::i1 &&
+ VTList.VTs[1].getVectorElementType() == MVT::i1) {
+ SDValue F1 = getFreeze(N1);
+ SDValue F2 = getFreeze(N2);
+ // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)}
+ if (Opcode == ISD::UADDO || Opcode == ISD::SADDO)
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)},
+ Flags);
+ // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)}
+ if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) {
+ SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList,
+ {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2),
+ getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)},
+ Flags);
+ }
+ }
break;
}
case ISD::SMUL_LOHI:
@@ -9651,6 +9913,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
VTList.VTs[0] == Ops[0].getValueType() &&
VTList.VTs[0] == Ops[1].getValueType() &&
"Binary operator types must match!");
+ // Constant fold.
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]);
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]);
+ if (LHS && RHS) {
+ unsigned Width = VTList.VTs[0].getScalarSizeInBits();
+ unsigned OutWidth = Width * 2;
+ APInt Val = LHS->getAPIntValue();
+ APInt Mul = RHS->getAPIntValue();
+ if (Opcode == ISD::SMUL_LOHI) {
+ Val = Val.sext(OutWidth);
+ Mul = Mul.sext(OutWidth);
+ } else {
+ Val = Val.zext(OutWidth);
+ Mul = Mul.zext(OutWidth);
+ }
+ Val *= Mul;
+
+ SDValue Hi =
+ getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]);
+ SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags);
+ }
break;
}
case ISD::FFREXP: {
@@ -9724,7 +10008,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
#endif
}
- // Memoize the node unless it returns a flag.
+ // Memoize the node unless it returns a glue result.
SDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
@@ -10097,7 +10381,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
/// For IROrder, we keep the smaller of the two
SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
- if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
}
unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -10566,11 +10850,18 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
switch (N.getOpcode()) {
default:
break;
- case ISD::ADD:
+ case ISD::ADD: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
- uint64_t Offset = N.getConstantOperandVal(1);
+ if (!isa<ConstantSDNode>(N0)) {
+ bool RHSConstant = isa<ConstantSDNode>(N1);
+ uint64_t Offset;
+ if (RHSConstant)
+ Offset = N.getConstantOperandVal(1);
+ // We are not allowed to turn indirect debug values variadic, so
+ // don't salvage those.
+ if (!RHSConstant && DV->isIndirect())
+ continue;
// Rewrite an ADD constant node into a DIExpression. Since we are
// performing arithmetic to compute the variable's *value* in the
@@ -10579,7 +10870,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
auto *DIExpr = DV->getExpression();
auto NewLocOps = DV->copyLocationOps();
bool Changed = false;
- for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ size_t OrigLocOpsSize = NewLocOps.size();
+ for (size_t i = 0; i < OrigLocOpsSize; ++i) {
// We're not given a ResNo to compare against because the whole
// node is going away. We know that any ISD::ADD only has one
// result, so we can assume any node match is using the result.
@@ -10587,19 +10879,37 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
NewLocOps[i].getSDNode() != &N)
continue;
NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
- SmallVector<uint64_t, 3> ExprOps;
- DIExpression::appendOffset(ExprOps, Offset);
- DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ if (RHSConstant) {
+ SmallVector<uint64_t, 3> ExprOps;
+ DIExpression::appendOffset(ExprOps, Offset);
+ DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true);
+ } else {
+ // Convert to a variadic expression (if not already).
+ // convertToVariadicExpression() returns a const pointer, so we use
+ // a temporary const variable here.
+ const auto *TmpDIExpr =
+ DIExpression::convertToVariadicExpression(DIExpr);
+ SmallVector<uint64_t, 3> ExprOps;
+ ExprOps.push_back(dwarf::DW_OP_LLVM_arg);
+ ExprOps.push_back(NewLocOps.size());
+ ExprOps.push_back(dwarf::DW_OP_plus);
+ SDDbgOperand RHS =
+ SDDbgOperand::fromNode(N1.getNode(), N1.getResNo());
+ NewLocOps.push_back(RHS);
+ DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true);
+ }
Changed = true;
}
(void)Changed;
assert(Changed && "Salvage target doesn't use N");
+ bool IsVariadic =
+ DV->isVariadic() || OrigLocOpsSize != NewLocOps.size();
+
auto AdditionalDependencies = DV->getAdditionalDependencies();
- SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr,
- NewLocOps, AdditionalDependencies,
- DV->isIndirect(), DV->getDebugLoc(),
- DV->getOrder(), DV->isVariadic());
+ SDDbgValue *Clone = getDbgValueList(
+ DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies,
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic);
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
DV->setIsEmitted();
@@ -10607,6 +10917,41 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
N0.getNode()->dumprFull(this);
dbgs() << " into " << *DIExpr << '\n');
}
+ break;
+ }
+ case ISD::TRUNCATE: {
+ SDValue N0 = N.getOperand(0);
+ TypeSize FromSize = N0.getValueSizeInBits();
+ TypeSize ToSize = N.getValueSizeInBits(0);
+
+ DIExpression *DbgExpression = DV->getExpression();
+ auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false);
+ auto NewLocOps = DV->copyLocationOps();
+ bool Changed = false;
+ for (size_t i = 0; i < NewLocOps.size(); ++i) {
+ if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE ||
+ NewLocOps[i].getSDNode() != &N)
+ continue;
+
+ NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo());
+ DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i);
+ Changed = true;
+ }
+ assert(Changed && "Salvage target doesn't use N");
+ (void)Changed;
+
+ SDDbgValue *Clone =
+ getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps,
+ DV->getAdditionalDependencies(), DV->isIndirect(),
+ DV->getDebugLoc(), DV->getOrder(), DV->isVariadic());
+
+ ClonedDVs.push_back(Clone);
+ DV->setIsInvalidated();
+ DV->setIsEmitted();
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DbgExpression << '\n');
+ break;
+ }
}
}
@@ -12110,6 +12455,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
// FIXME: This does not work for vectors with elements less than 8 bits.
while (VecWidth > 8) {
+ // If we can't split in half, stop here.
+ if (VecWidth & 1)
+ break;
+
unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize);
APInt LowValue = SplatValue.extractBits(HalfSize, 0);
@@ -12127,6 +12476,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
VecWidth = HalfSize;
}
+ // FIXME: The loop above only tries to split in halves. But if the input
+ // vector for example is <3 x i16> it wouldn't be able to detect a
+ // SplatBitSize of 16. No idea if that is a design flaw currently limiting
+ // optimizations. I guess that back in the days when this helper was created
+ // vectors normally was power-of-2 sized.
+
SplatBitSize = VecWidth;
return true;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index a432d8e92bca..39a1e09e83c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -130,7 +130,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// If the base are the same frame index but the we couldn't find a
// constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ if (A->getIndex() != B->getIndex() && (!MFI.isFixedObjectIndex(A->getIndex()) ||
!MFI.isFixedObjectIndex(B->getIndex()))) {
IsAlias = false;
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9595da9d0d8a..4fd76d012a16 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -76,6 +76,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -989,15 +990,15 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ InlineAsm::Flag Flag(Code, Regs.size());
if (HasMatching)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ Flag.setMatchingOp(MatchingIdx);
else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
@@ -1006,13 +1007,13 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
// from the def.
const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
- Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ Flag.setRegClass(RC->getID());
}
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
- if (Code == InlineAsm::Kind_Clobber) {
+ if (Code == InlineAsm::Kind::Clobber) {
// Clobbers should always have a 1:1 mapping with registers, and may
// reference registers that have illegal (e.g. vector) types. Hence, we
// shouldn't try to apply any sort of splitting logic to them.
@@ -1147,12 +1148,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
return updateRoot(PendingExports);
}
-void SelectionDAGBuilder::visit(const Instruction &I) {
- // Set up outgoing PHI node register values before emitting the terminator.
- if (I.isTerminator()) {
- HandlePHINodesInSuccessorBlocks(I.getParent());
- }
-
+void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) {
// Add SDDbgValue nodes for any var locs here. Do so before updating
// SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}.
if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) {
@@ -1168,11 +1164,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
}
SmallVector<Value *> Values(It->Values.location_ops());
if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder,
- It->Values.hasArgList()))
- addDanglingDebugInfo(It, SDNodeOrder);
+ It->Values.hasArgList())) {
+ SmallVector<Value *, 4> Vals;
+ for (Value *V : It->Values.location_ops())
+ Vals.push_back(V);
+ addDanglingDebugInfo(Vals,
+ FnVarLocs->getDILocalVariable(It->VariableID),
+ It->Expr, Vals.size() > 1, It->DL, SDNodeOrder);
+ }
}
}
+ // Is there is any debug-info attached to this instruction, in the form of
+ // DPValue non-instruction debug-info records.
+ for (DPValue &DPV : I.getDbgValueRange()) {
+ DILocalVariable *Variable = DPV.getVariable();
+ DIExpression *Expression = DPV.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
+
+ // A DPValue with no locations is a kill location.
+ SmallVector<Value *, 4> Values(DPV.location_ops());
+ if (Values.empty()) {
+ handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder);
+ continue;
+ }
+
+ // A DPValue with an undef or absent location is also a kill location.
+ if (llvm::any_of(Values,
+ [](Value *V) { return !V || isa<UndefValue>(V); })) {
+ handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder);
+ continue;
+ }
+
+ bool IsVariadic = DPV.hasArgList();
+ if (!handleDebugValue(Values, Variable, Expression, DPV.getDebugLoc(),
+ SDNodeOrder, IsVariadic)) {
+ addDanglingDebugInfo(Values, Variable, Expression, IsVariadic,
+ DPV.getDebugLoc(), SDNodeOrder);
+ }
+ }
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ visitDbgInfo(I);
+
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (I.isTerminator()) {
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+ }
+
// Increase the SDNodeOrder if dealing with a non-debug instruction.
if (!isa<DbgInfoIntrinsic>(I))
++SDNodeOrder;
@@ -1231,14 +1273,12 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
DILocalVariable *Variable,
DebugLoc DL, unsigned Order,
- RawLocationWrapper Values,
+ SmallVectorImpl<Value *> &Values,
DIExpression *Expression) {
- if (!Values.hasArgList())
- return false;
// For variadic dbg_values we will now insert an undef.
// FIXME: We can potentially recover these!
SmallVector<SDDbgOperand, 2> Locs;
- for (const Value *V : Values.location_ops()) {
+ for (const Value *V : Values) {
auto *Undef = UndefValue::get(V->getType());
Locs.push_back(SDDbgOperand::fromConst(Undef));
}
@@ -1249,44 +1289,31 @@ static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
return true;
}
-void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
- unsigned Order) {
- if (!handleDanglingVariadicDebugInfo(
- DAG,
- const_cast<DILocalVariable *>(DAG.getFunctionVarLocs()
- ->getVariable(VarLoc->VariableID)
- .getVariable()),
- VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) {
- DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back(
- VarLoc, Order);
- }
-}
-
-void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
+void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
+ DILocalVariable *Var,
+ DIExpression *Expr,
+ bool IsVariadic, DebugLoc DL,
unsigned Order) {
- // We treat variadic dbg_values differently at this stage.
- if (!handleDanglingVariadicDebugInfo(
- DAG, DI->getVariable(), DI->getDebugLoc(), Order,
- DI->getWrappedLocation(), DI->getExpression())) {
- // TODO: Dangling debug info will eventually either be resolved or produce
- // an Undef DBG_VALUE. However in the resolution case, a gap may appear
- // between the original dbg.value location and its resolved DBG_VALUE,
- // which we should ideally fill with an extra Undef DBG_VALUE.
- assert(DI->getNumVariableLocationOps() == 1 &&
- "DbgValueInst without an ArgList should have a single location "
- "operand.");
- DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order);
+ if (IsVariadic) {
+ handleDanglingVariadicDebugInfo(DAG, Var, DL, Order, Values, Expr);
+ return;
}
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE,
+ // which we should ideally fill with an extra Undef DBG_VALUE.
+ assert(Values.size() == 1);
+ DanglingDebugInfoMap[Values[0]].emplace_back(Var, Expr, DL, Order);
}
void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr) {
auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
- DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DIVariable *DanglingVariable = DDI.getVariable();
DIExpression *DanglingExpr = DDI.getExpression();
if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
- LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for "
+ << printDDI(nullptr, DDI) << "\n");
return true;
}
return false;
@@ -1299,7 +1326,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
// whether it can be salvaged.
for (auto &DDI : DDIV)
if (isMatchingDbgValue(DDI))
- salvageUnresolvedDbgValue(DDI);
+ salvageUnresolvedDbgValue(DDIMI.first, DDI);
erase_if(DDIV, isMatchingDbgValue);
}
@@ -1318,7 +1345,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DebugLoc DL = DDI.getDebugLoc();
unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
- DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs());
+ DILocalVariable *Variable = DDI.getVariable();
DIExpression *Expr = DDI.getExpression();
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -1332,8 +1359,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL,
FuncArgumentDbgValueKind::Value, Val)) {
- LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info for "
+ << printDDI(V, DDI) << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
// Increase the SDNodeOrder for the DbgValue here to make sure it is
// inserted after the definition of Val when emitting the instructions
@@ -1347,9 +1374,11 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DAG.AddDbgValue(SDV, false);
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for "
- << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n");
+ << printDDI(V, DDI)
+ << " in EmitFuncArgumentDbgValue\n");
} else {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI)
+ << "\n");
auto Undef = UndefValue::get(V->getType());
auto SDV =
DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder);
@@ -1359,14 +1388,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DDIV.clear();
}
-void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V,
+ DanglingDebugInfo &DDI) {
// TODO: For the variadic implementation, instead of only checking the fail
// state of `handleDebugValue`, we need know specifically which values were
// invalid, so that we attempt to salvage only those values when processing
// a DIArgList.
- Value *V = DDI.getVariableLocationOp(0);
- Value *OrigV = V;
- DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs());
+ const Value *OrigV = V;
+ DILocalVariable *Var = DDI.getVariable();
DIExpression *Expr = DDI.getExpression();
DebugLoc DL = DDI.getDebugLoc();
unsigned SDOrder = DDI.getSDNodeOrder();
@@ -1383,11 +1412,12 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
// a non-instruction is seen, such as a constant expression or global
// variable. FIXME: Further work could recover those too.
while (isa<Instruction>(V)) {
- Instruction &VAsInst = *cast<Instruction>(V);
+ const Instruction &VAsInst = *cast<const Instruction>(V);
// Temporary "0", awaiting real implementation.
SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ V = salvageDebugInfoImpl(const_cast<Instruction &>(VAsInst),
+ Expr->getNumLocationOperands(), Ops,
AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
@@ -1420,8 +1450,8 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
auto *Undef = UndefValue::get(OrigV->getType());
auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
DAG.AddDbgValue(SDV, false);
- LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n "
+ << printDDI(OrigV, DDI) << "\n");
}
void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
@@ -1571,7 +1601,7 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() {
// Try to fixup any remaining dangling debug info -- and drop it if we can't.
for (auto &Pair : DanglingDebugInfoMap)
for (auto &DDI : Pair.second)
- salvageUnresolvedDbgValue(DDI);
+ salvageUnresolvedDbgValue(const_cast<Value *>(Pair.first), DDI);
clearDanglingDebugInfo();
}
@@ -1738,6 +1768,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const auto *NC = dyn_cast<NoCFIValue>(C))
return getValue(NC->getGlobalValue());
+ if (VT == MVT::aarch64svcount) {
+ assert(C->isNullValue() && "Can only zero this target type!");
+ return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT,
+ DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1));
+ }
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1822,7 +1858,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
if (TargetMBB != NextBlock(FuncInfo.MBB) ||
- TM.getOptLevel() == CodeGenOpt::None)
+ TM.getOptLevel() == CodeGenOptLevel::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(TargetMBB)));
return;
@@ -2049,7 +2085,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
- TypeSize::Fixed(Offsets[i]));
+ TypeSize::getFixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
@@ -2478,7 +2514,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ if (Succ0MBB != NextBlock(BrMBB) ||
+ TM.getOptLevel() == CodeGenOptLevel::None) {
auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(Succ0MBB));
setValue(&I, Br);
@@ -2662,14 +2699,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
/// visitJumpTable - Emit JumpTable node in the current MBB
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
+ assert(JT.SL && "Should set SDLoc for SelectionDAG!");
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
- JT.Reg, PTy);
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
- SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
- MVT::Other, Index.getValue(1),
- Table, Index);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other,
+ Index.getValue(1), Table, Index);
DAG.setRoot(BrJumpTable);
}
@@ -2678,7 +2714,8 @@ void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
- SDLoc dl = getCurSDLoc();
+ assert(JT.SL && "Should set SDLoc for SelectionDAG!");
+ const SDLoc &dl = *JT.SL;
// Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
@@ -2775,7 +2812,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
Align Align =
- DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+ DAG.getDataLayout().getPrefTypeAlign(PointerType::get(M.getContext(), 0));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -3225,14 +3262,9 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
// We may be able to ignore unreachable behind a noreturn call.
if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
- const BasicBlock &BB = *I.getParent();
- if (&I != &BB.front()) {
- BasicBlock::const_iterator PredI =
- std::prev(BasicBlock::const_iterator(&I));
- if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
- if (Call->doesNotReturn())
- return;
- }
+ if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode())) {
+ if (Call->doesNotReturn())
+ return;
}
}
@@ -3466,7 +3498,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
}
if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
- (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
// If the underlying comparison instruction is used by any other
@@ -3522,9 +3554,23 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
// ZExt also can't be a cast to bool for same reason. So, nothing much to do
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- I.getType());
- setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ SDNodeFlags Flags;
+ if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I))
+ Flags.setNonNeg(PNI->hasNonNeg());
+
+ // Eagerly use nonneg information to canonicalize towards sign_extend if
+ // that is the target's preference.
+ // TODO: Let the target do this later.
+ if (Flags.hasNonNeg() &&
+ TLI.isSExtCheaperThanZExt(N.getValueType(), DestVT)) {
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
+ return;
+ }
+
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitSExt(const User &I) {
@@ -4111,7 +4157,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace());
+ EVT IntPtr = TLI.getPointerTy(DL, I.getAddressSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -4120,10 +4166,12 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
DAG.getVScale(dl, IntPtr,
APInt(IntPtr.getScalarSizeInBits(),
TySize.getKnownMinValue())));
- else
- AllocSize =
- DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
- DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));
+ else {
+ SDValue TySizeValue =
+ DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64));
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
+ DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
+ }
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -4156,6 +4204,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
}
+static const MDNode *getRangeMetadata(const Instruction &I) {
+ // If !noundef is not present, then !range violation results in a poison
+ // value rather than immediate undefined behavior. In theory, transferring
+ // these annotations to SDAG is fine, but in practice there are key SDAG
+ // transforms that are known not to be poison-safe, such as folding logical
+ // and/or to bitwise and/or. For now, only transfer !range if !noundef is
+ // also present.
+ if (!I.hasMetadata(LLVMContext::MD_noundef))
+ return nullptr;
+ return I.getMetadata(LLVMContext::MD_range);
+}
+
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
@@ -4180,7 +4240,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
SmallVector<EVT, 4> ValueVTs, MemVTs;
- SmallVector<uint64_t, 4> Offsets;
+ SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
@@ -4188,7 +4248,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Align Alignment = I.getAlign();
AAMDNodes AAInfo = I.getAAMetadata();
- const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(I);
bool isVolatile = I.isVolatile();
MachineMemOperand::Flags MMOFlags =
TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo);
@@ -4219,14 +4279,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile)
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
- // An aggregate load cannot wrap around the address space, so offsets to its
- // parts don't wrap either.
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
-
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
- EVT PtrVT = Ptr.getValueType();
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
@@ -4243,13 +4297,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue A = DAG.getNode(ISD::ADD, dl,
- PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT),
- Flags);
- SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
- MachinePointerInfo(SV, Offsets[i]), Alignment,
+ // TODO: MachinePointerInfo only supports a fixed length offset.
+ MachinePointerInfo PtrInfo =
+ !Offsets[i].isScalable() || Offsets[i].isZero()
+ ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue())
+ : MachinePointerInfo();
+
+ SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);
+ SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment,
MMOFlags, AAInfo, Ranges);
Chains[ChainI] = L.getValue(1);
@@ -4351,7 +4407,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SmallVector<EVT, 4> ValueVTs, MemVTs;
- SmallVector<uint64_t, 4> Offsets;
+ SmallVector<TypeSize, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
@@ -4372,11 +4428,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
- // An aggregate load cannot wrap around the address space, so offsets to its
- // parts don't wrap either.
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
-
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
@@ -4386,14 +4437,19 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue Add =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
+
+ // TODO: MachinePointerInfo only supports a fixed length offset.
+ MachinePointerInfo PtrInfo =
+ !Offsets[i].isScalable() || Offsets[i].isZero()
+ ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue())
+ : MachinePointerInfo();
+
+ SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
SDValue St =
- DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
- Alignment, MMOFlags, AAInfo);
+ DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
@@ -4607,7 +4663,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo = I.getAAMetadata();
- const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(I);
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
@@ -4641,7 +4697,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
->getMaybeAlignValue()
.value_or(DAG.getEVTAlign(VT.getScalarType()));
- const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(I);
SDValue Root = DAG.getRoot();
SDValue Base;
@@ -4801,23 +4857,6 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
-
- if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for loads to prevent future divergence.
- SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
- if (MemVT != VT)
- L = DAG.getPtrExtOrTrunc(L, dl, VT);
-
- setValue(&I, L);
- SDValue OutChain = L.getValue(1);
- if (!I.isUnordered())
- DAG.setRoot(OutChain);
- else
- PendingLoads.push_back(OutChain);
- return;
- }
-
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4857,16 +4896,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
- if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for stores to prevent future divergence.
- SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
- setValue(&I, S);
- DAG.setRoot(S);
- return;
- }
- SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
- Ptr, Val, MMO);
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO);
setValue(&I, OutChain);
DAG.setRoot(OutChain);
@@ -5821,26 +5852,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op)
return false;
- // If the expression refers to the entry value of an Argument, use the
- // corresponding livein physical register. As per the Verifier, this is only
- // allowed for swiftasync Arguments.
- if (Op->isReg() && Expr->isEntryValue()) {
- assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
- auto OpReg = Op->getReg();
- for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
- if (OpReg == VirtReg || OpReg == PhysReg) {
- SDDbgValue *SDV = DAG.getVRegDbgValue(
- Variable, Expr, PhysReg,
- Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL,
- SDNodeOrder);
- DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
- return true;
- }
- LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
- "couldn't find a physical register\n");
- return true;
- }
-
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
@@ -5929,6 +5940,41 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
+/// If DI is a debug value with an EntryValue expression, lower it using the
+/// corresponding physical register of the associated Argument value
+/// (guaranteed to exist by the verifier).
+bool SelectionDAGBuilder::visitEntryValueDbgValue(const DbgValueInst &DI) {
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expr = DI.getExpression();
+ if (!Expr->isEntryValue() || !hasSingleElement(DI.getValues()))
+ return false;
+
+ // These properties are guaranteed by the verifier.
+ Argument *Arg = cast<Argument>(DI.getValue(0));
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+
+ auto ArgIt = FuncInfo.ValueMap.find(Arg);
+ if (ArgIt == FuncInfo.ValueMap.end()) {
+ LLVM_DEBUG(
+ dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find an associated register for the Argument\n");
+ return true;
+ }
+ Register ArgVReg = ArgIt->getSecond();
+
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (ArgVReg == VirtReg || ArgVReg == PhysReg) {
+ SDDbgValue *SDV =
+ DAG.getVRegDbgValue(Variable, Expr, PhysReg, false /*IsIndidrect*/,
+ DI.getDebugLoc(), SDNodeOrder);
+ DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n");
+ return true;
+}
+
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
@@ -6258,6 +6304,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
+ if (visitEntryValueDbgValue(DI))
+ return;
+
if (DI.isKillLocation()) {
handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder);
return;
@@ -6270,7 +6319,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool IsVariadic = DI.hasArgList();
if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(),
SDNodeOrder, IsVariadic))
- addDanglingDebugInfo(&DI, SDNodeOrder);
+ addDanglingDebugInfo(Values, Variable, Expression, IsVariadic,
+ DI.getDebugLoc(), SDNodeOrder);
return;
}
@@ -6383,6 +6433,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::exp10:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
@@ -6398,6 +6449,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
@@ -6657,6 +6709,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::reset_fpenv:
DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
return;
+ case Intrinsic::get_fpmode:
+ Res = DAG.getNode(
+ ISD::GET_FPMODE, sdl,
+ DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ MVT::Other),
+ DAG.getRoot());
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ case Intrinsic::set_fpmode:
+ Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()},
+ getValue(I.getArgOperand(0)));
+ DAG.setRoot(Res);
+ return;
+ case Intrinsic::reset_fpmode: {
+ Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot());
+ DAG.setRoot(Res);
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -7041,15 +7112,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
- Ops[2] = getValue(I.getArgOperand(1));
- Ops[3] = getValue(I.getArgOperand(2));
- Ops[4] = getValue(I.getArgOperand(3));
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32);
+ Ops[3] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(2)), sdl,
+ MVT::i32);
+ Ops[4] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(3)), sdl,
+ MVT::i32);
SDValue Result = DAG.getMemIntrinsicNode(
ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
/* align */ std::nullopt, Flags);
- // Chain the prefetch in parallell with any pending loads, to stay out of
+ // Chain the prefetch in parallel with any pending loads, to stay out of
// the way of later optimizations.
PendingLoads.push_back(Result);
Result = getRoot();
@@ -7060,7 +7134,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
- if (TM.getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOptLevel::None)
return;
const int64_t ObjectSize =
@@ -7145,6 +7219,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
llvm_unreachable("instrprof failed to lower a timestamp");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
+ case Intrinsic::instrprof_mcdc_parameters:
+ llvm_unreachable("instrprof failed to lower mcdc parameters");
+ case Intrinsic::instrprof_mcdc_tvbitmap_update:
+ llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update");
+ case Intrinsic::instrprof_mcdc_condbitmap_update:
+ llvm_unreachable("instrprof failed to lower an mcdc condbitmap update");
case Intrinsic::localescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -7372,13 +7452,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Val);
return;
}
+ case Intrinsic::amdgcn_cs_chain: {
+ assert(I.arg_size() == 5 && "Additional args not supported yet");
+ assert(cast<ConstantInt>(I.getOperand(4))->isZero() &&
+ "Non-zero flags not supported yet");
+
+ // At this point we don't care if it's amdgpu_cs_chain or
+ // amdgpu_cs_chain_preserve.
+ CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain;
+
+ Type *RetTy = I.getType();
+ assert(RetTy->isVoidTy() && "Should not return");
+
+ SDValue Callee = getValue(I.getOperand(0));
+
+ // We only have 2 actual args: one for the SGPRs and one for the VGPRs.
+ // We'll also tack the value of the EXEC mask at the end.
+ TargetLowering::ArgListTy Args;
+ Args.reserve(3);
+
+ for (unsigned Idx : {2, 3, 1}) {
+ TargetLowering::ArgListEntry Arg;
+ Arg.Node = getValue(I.getOperand(Idx));
+ Arg.Ty = I.getOperand(Idx)->getType();
+ Arg.setAttributes(&I, Idx);
+ Args.push_back(Arg);
+ }
+
+ assert(Args[0].IsInReg && "SGPR args should be marked inreg");
+ assert(!Args[1].IsInReg && "VGPR args should not be marked inreg");
+ Args[2].IsInReg = true; // EXEC should be inreg
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(CC, RetTy, Callee, std::move(Args))
+ .setNoReturn(true)
+ .setTailCall(true)
+ .setConvergent(I.isConvergent());
+ CLI.CB = &I;
+ std::pair<SDValue, SDValue> Result =
+ lowerInvokable(CLI, /*EHPadBB*/ nullptr);
+ (void)Result;
+ assert(!Result.first.getNode() && !Result.second.getNode() &&
+ "Should've lowered as tail call");
+
+ HasTailCall = true;
+ return;
+ }
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
- SDValue Const = getValue(I.getOperand(1));
+ SDValue Mask = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
+ assert(PtrVT == Mask.getValueType() &&
+ "Pointers with different index type are not supported by SDAG");
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
return;
}
case Intrinsic::threadlocal_address: {
@@ -7396,7 +7525,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
SDValue TripCount = getValue(I.getOperand(1));
- auto VecTy = CCVT.changeVectorElementType(ElementVT);
+ EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT,
+ CCVT.getVectorElementCount());
SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index);
SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount);
@@ -7442,6 +7572,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Trunc);
return;
}
+ case Intrinsic::experimental_cttz_elts: {
+ auto DL = getCurSDLoc();
+ SDValue Op = getValue(I.getOperand(0));
+ EVT OpVT = Op.getValueType();
+
+ if (!TLI.shouldExpandCttzElements(OpVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ if (OpVT.getScalarType() != MVT::i1) {
+ // Compare the input vector elements to zero & use to count trailing zeros
+ SDValue AllZero = DAG.getConstant(0, DL, OpVT);
+ OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ OpVT.getVectorElementCount());
+ Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE);
+ }
+
+ // Find the smallest "sensible" element type to use for the expansion.
+ ConstantRange CR(
+ APInt(64, OpVT.getVectorElementCount().getKnownMinValue()));
+ if (OpVT.isScalableVT())
+ CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64));
+
+ // If the zero-is-poison flag is set, we can assume the upper limit
+ // of the result is VF-1.
+ if (!cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero())
+ CR = CR.subtract(APInt(64, 1));
+
+ unsigned EltWidth = I.getType()->getScalarSizeInBits();
+ EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits());
+ EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8);
+
+ MVT NewEltTy = MVT::getIntegerVT(EltWidth);
+
+ // Create the new vector type & get the vector length
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltTy,
+ OpVT.getVectorElementCount());
+
+ SDValue VL =
+ DAG.getElementCount(DL, NewEltTy, OpVT.getVectorElementCount());
+
+ SDValue StepVec = DAG.getStepVector(DL, NewVT);
+ SDValue SplatVL = DAG.getSplat(NewVT, DL, VL);
+ SDValue StepVL = DAG.getNode(ISD::SUB, DL, NewVT, SplatVL, StepVec);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, Op);
+ SDValue And = DAG.getNode(ISD::AND, DL, NewVT, StepVL, Ext);
+ SDValue Max = DAG.getNode(ISD::VECREDUCE_UMAX, DL, NewEltTy, And);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, NewEltTy, VL, Max);
+
+ EVT RetTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Ret = DAG.getZExtOrTrunc(Sub, DL, RetTy);
+
+ setValue(&I, Ret);
+ return;
+ }
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
@@ -7645,7 +7831,7 @@ void SelectionDAGBuilder::visitVPLoad(
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
- const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
SDValue LD;
// Do not serialize variable-length loads of constant memory with
// anything.
@@ -7672,7 +7858,7 @@ void SelectionDAGBuilder::visitVPGather(
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
- const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
SDValue LD;
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
@@ -7779,7 +7965,7 @@ void SelectionDAGBuilder::visitVPStridedLoad(
if (!Alignment)
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
- const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ const MDNode *Ranges = getRangeMetadata(VPIntrin);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@@ -7929,6 +8115,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
}
break;
}
+ case ISD::VP_IS_FPCLASS: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT DestVT = TLI.getValueType(DLayout, VPIntrin.getType());
+ auto Constant = cast<ConstantSDNode>(OpValues[1])->getZExtValue();
+ SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32);
+ SDValue V = DAG.getNode(ISD::VP_IS_FPCLASS, DL, DestVT,
+ {OpValues[0], Check, OpValues[2], OpValues[3]});
+ setValue(&VPIntrin, V);
+ return;
+ }
case ISD::VP_INTTOPTR: {
SDValue N = OpValues[0];
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType());
@@ -8660,6 +8856,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ if (visitUnaryFloatCall(I, ISD::FEXP10))
+ return;
+ break;
case LibFunc_ldexp:
case LibFunc_ldexpf:
case LibFunc_ldexpl:
@@ -8955,11 +9157,11 @@ findMatchingInlineAsmOperand(unsigned OperandNo,
// Advance to the next operand.
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
- assert((InlineAsm::isRegDefKind(OpFlag) ||
- InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
- InlineAsm::isMemKind(OpFlag)) &&
- "Skipped past definitions?");
- CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+ const InlineAsm::Flag F(OpFlag);
+ assert(
+ (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) &&
+ "Skipped past definitions?");
+ CurOp += F.getNumOperandRegisters() + 1;
}
return CurOp;
}
@@ -9217,14 +9419,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this output.
- unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1);
+ OpFlags.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
@@ -9245,8 +9447,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
- OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
+ OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
+ : InlineAsm::Kind::RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
break;
@@ -9260,11 +9462,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// just use its register.
auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
AsmNodeOperands);
- unsigned OpFlag =
- cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
- if (InlineAsm::isRegDefKind(OpFlag) ||
- InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
- // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ InlineAsm::Flag Flag(
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue());
+ if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
emitInlineAsmError(Call, "inline asm not supported yet: "
@@ -9284,8 +9484,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
: RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
: TRI.getMinimalPhysRegClass(TiedReg);
- unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
- for (unsigned i = 0; i != NumRegs; ++i)
+ for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
@@ -9293,22 +9492,21 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
- MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
- true, OpInfo.getMatchedOperand(), dl,
- DAG, AsmNodeOperands);
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true,
+ OpInfo.getMatchedOperand(), dl, DAG,
+ AsmNodeOperands);
break;
}
- assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
- assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ assert(Flag.isMemKind() && "Unknown matching constraint!");
+ assert(Flag.getNumOperandRegisters() == 1 &&
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
- OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
- OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
- OpInfo.getMatchedOperand());
+ Flag.clearMemConstraint();
+ Flag.setMatchingOp(OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(
- OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ Flag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
break;
}
@@ -9338,8 +9536,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
// Add information to the INLINEASM node to know about this input.
- unsigned ResOpType =
- InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
llvm::append_range(AsmNodeOperands, Ops);
@@ -9354,14 +9551,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
// Add information to the INLINEASM node to know about this input.
- unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
+ ResOpType.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
getCurSDLoc(),
MVT::i32));
@@ -9370,24 +9567,24 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
if (OpInfo.ConstraintType == TargetLowering::C_Address) {
- unsigned ConstraintID =
+ const InlineAsm::ConstraintCode ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
- assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ assert(ConstraintID != InlineAsm::ConstraintCode::Unknown &&
"Failed to convert memory constraint code to constraint id.");
- unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1);
SDValue AsmOp = InOperandVal;
if (isFunction(InOperandVal)) {
auto *GA = cast<GlobalAddressSDNode>(InOperandVal);
- ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1);
+ ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1);
AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(),
InOperandVal.getValueType(),
GA->getOffset());
}
// Add information to the INLINEASM node to know about this input.
- ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ ResOpType.setMemConstraint(ConstraintID);
AsmNodeOperands.push_back(
DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32));
@@ -9425,15 +9622,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue,
&Call);
- OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
- dl, DAG, AsmNodeOperands);
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false,
+ 0, dl, DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
- OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber,
false, 0, getCurSDLoc(), DAG,
AsmNodeOperands);
break;
@@ -9626,7 +9823,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
const Instruction &I,
SDValue Op) {
- const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
+ const MDNode *Range = getRangeMetadata(I);
if (!Range)
return Op;
@@ -9670,7 +9867,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
- bool IsPatchPoint) {
+ AttributeSet RetAttrs, bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -9691,7 +9888,8 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args),
+ RetAttrs)
.setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint)
.setIsPreallocated(
@@ -9840,7 +10038,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
TargetLowering::CallLoweringInfo CLI(DAG);
populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
- ReturnTy, true);
+ ReturnTy, CB.getAttributes().getRetAttrs(), true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -11234,7 +11432,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (TM.getOptLevel() != CodeGenOpt::None) {
+ if (TM.getOptLevel() != CodeGenOptLevel::None) {
// Here, we order cases by probability so the most likely case will be
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
@@ -11310,8 +11508,32 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (FallthroughUnreachable)
- JTH->FallthroughUnreachable = true;
+ // If the default clause is unreachable, propagate that knowledge into
+ // JTH->FallthroughUnreachable which will use it to suppress the range
+ // check.
+ //
+ // However, don't do this if we're doing branch target enforcement,
+ // because a table branch _without_ a range check can be a tempting JOP
+ // gadget - out-of-bounds inputs that are impossible in correct
+ // execution become possible again if an attacker can influence the
+ // control flow. So if an attacker doesn't already have a BTI bypass
+ // available, we don't want them to be able to get one out of this
+ // table branch.
+ if (FallthroughUnreachable) {
+ Function &CurFunc = CurMF->getFunction();
+ bool HasBranchTargetEnforcement = false;
+ if (CurFunc.hasFnAttribute("branch-target-enforcement")) {
+ HasBranchTargetEnforcement =
+ CurFunc.getFnAttribute("branch-target-enforcement")
+ .getValueAsBool();
+ } else {
+ HasBranchTargetEnforcement =
+ CurMF->getMMI().getModule()->getModuleFlag(
+ "branch-target-enforcement");
+ }
+ if (!HasBranchTargetEnforcement)
+ JTH->FallthroughUnreachable = true;
+ }
if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
@@ -11568,7 +11790,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
- TM.getOptLevel() == CodeGenOpt::None ||
+ TM.getOptLevel() == CodeGenOptLevel::None ||
SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
@@ -11652,7 +11874,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
+ SL->findJumpTables(Clusters, &SI, getCurSDLoc(), DefaultMBB, DAG.getPSI(),
+ DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -11690,7 +11913,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
- if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None &&
!DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f2496f24973a..5b55c3461b0b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -106,54 +106,39 @@ class SelectionDAGBuilder {
/// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- using DbgValTy = const DbgValueInst *;
- using VarLocTy = const VarLocInfo *;
- PointerUnion<DbgValTy, VarLocTy> Info;
unsigned SDNodeOrder = 0;
public:
+ DILocalVariable *Variable;
+ DIExpression *Expression;
+ DebugLoc dl;
DanglingDebugInfo() = default;
- DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO)
- : Info(DI), SDNodeOrder(SDNO) {}
- DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO)
- : Info(VarLoc), SDNodeOrder(SDNO) {}
+ DanglingDebugInfo(DILocalVariable *Var, DIExpression *Expr, DebugLoc DL,
+ unsigned SDNO)
+ : SDNodeOrder(SDNO), Variable(Var), Expression(Expr),
+ dl(std::move(DL)) {}
- DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
- if (isa<VarLocTy>(Info))
- return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID);
- return cast<DbgValTy>(Info)->getVariable();
- }
- DIExpression *getExpression() const {
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->Expr;
- return cast<DbgValTy>(Info)->getExpression();
- }
- Value *getVariableLocationOp(unsigned Idx) const {
- assert(Idx == 0 && "Dangling variadic debug values not supported yet");
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx);
- return cast<DbgValTy>(Info)->getVariableLocationOp(Idx);
- }
- DebugLoc getDebugLoc() const {
- if (isa<VarLocTy>(Info))
- return cast<VarLocTy>(Info)->DL;
- return cast<DbgValTy>(Info)->getDebugLoc();
- }
+ DILocalVariable *getVariable() const { return Variable; }
+ DIExpression *getExpression() const { return Expression; }
+ DebugLoc getDebugLoc() const { return dl; }
unsigned getSDNodeOrder() const { return SDNodeOrder; }
/// Helper for printing DanglingDebugInfo. This hoop-jumping is to
- /// accommodate the fact that an argument is required for getVariable.
+ /// store a Value pointer, so that we can print a whole DDI as one object.
/// Call SelectionDAGBuilder::printDDI instead of using directly.
struct Print {
- Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs)
- : DDI(DDI), VarLocs(VarLocs) {}
+ Print(const Value *V, const DanglingDebugInfo &DDI) : V(V), DDI(DDI) {}
+ const Value *V;
const DanglingDebugInfo &DDI;
- const FunctionVarLocs *VarLocs;
friend raw_ostream &operator<<(raw_ostream &OS,
const DanglingDebugInfo::Print &P) {
- OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs)
- << ", val= " << *P.DDI.getVariableLocationOp(0)
- << ", expr=" << *P.DDI.getExpression()
+ OS << "DDI(var=" << *P.DDI.getVariable();
+ if (P.V)
+ OS << ", val=" << *P.V;
+ else
+ OS << ", val=nullptr";
+
+ OS << ", expr=" << *P.DDI.getExpression()
<< ", order=" << P.DDI.getSDNodeOrder()
<< ", loc=" << P.DDI.getDebugLoc() << ")";
return OS;
@@ -164,8 +149,9 @@ class SelectionDAGBuilder {
/// Returns an object that defines `raw_ostream &operator<<` for printing.
/// Usage example:
//// errs() << printDDI(MyDanglingInfo) << " is dangling\n";
- DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) {
- return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs());
+ DanglingDebugInfo::Print printDDI(const Value *V,
+ const DanglingDebugInfo &DDI) {
+ return DanglingDebugInfo::Print(V, DDI);
}
/// Helper type for DanglingDebugInfoMap.
@@ -295,10 +281,10 @@ public:
LLVMContext *Context = nullptr;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
- SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ SwiftErrorValueTracking &swifterror, CodeGenOptLevel ol)
: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
- SwiftError(swifterror) {}
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)),
+ FuncInfo(funcinfo), SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC,
const TargetLibraryInfo *li);
@@ -344,6 +330,7 @@ public:
ISD::NodeType ExtendType = ISD::ANY_EXTEND);
void visit(const Instruction &I);
+ void visitDbgInfo(const Instruction &I);
void visit(unsigned Opcode, const User &I);
@@ -352,8 +339,9 @@ public:
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// Register a dbg_value which relies on a Value which we have not yet seen.
- void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order);
- void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order);
+ void addDanglingDebugInfo(SmallVectorImpl<Value *> &Values,
+ DILocalVariable *Var, DIExpression *Expr,
+ bool IsVariadic, DebugLoc DL, unsigned Order);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
@@ -368,7 +356,7 @@ public:
/// For the given dangling debuginfo record, perform last-ditch efforts to
/// resolve the debuginfo to something that is represented in this DAG. If
/// this cannot be done, produce an Undef debug value record.
- void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+ void salvageUnresolvedDbgValue(const Value *V, DanglingDebugInfo &DDI);
/// For a given list of Values, attempt to create and record a SDDbgValue in
/// the SelectionDAG.
@@ -426,7 +414,8 @@ public:
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
- Type *ReturnTy, bool IsPatchPoint);
+ Type *ReturnTy, AttributeSet RetAttrs,
+ bool IsPatchPoint);
std::pair<SDValue, SDValue>
lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
@@ -625,6 +614,8 @@ private:
void visitInlineAsm(const CallBase &Call,
const BasicBlock *EHPadBB = nullptr);
+
+ bool visitEntryValueDbgValue(const DbgValueInst &I);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -785,7 +776,7 @@ struct RegsForValue {
/// Add this value to the specified inlineasm node operand list. This adds the
/// code marker, matching input operand index (if applicable), and includes
/// the number of values added into it.
- void AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 03a1ead5bbb4..78cc60084068 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -125,6 +125,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
case ISD::FrameIndex: return "FrameIndex";
case ISD::JumpTable: return "JumpTable";
+ case ISD::JUMP_TABLE_DEBUG_INFO:
+ return "JUMP_TABLE_DEBUG_INFO";
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
@@ -222,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
case ISD::STRICT_FEXP2: return "strict_fexp2";
+ case ISD::FEXP10: return "fexp10";
case ISD::FLOG: return "flog";
case ISD::STRICT_FLOG: return "strict_flog";
case ISD::FLOG2: return "flog2";
@@ -439,6 +442,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::RESET_FPENV: return "reset_fpenv";
case ISD::GET_FPENV_MEM: return "get_fpenv_mem";
case ISD::SET_FPENV_MEM: return "set_fpenv_mem";
+ case ISD::GET_FPMODE: return "get_fpmode";
+ case ISD::SET_FPMODE: return "set_fpmode";
+ case ISD::RESET_FPMODE: return "reset_fpmode";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -591,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasExact())
OS << " exact";
+ if (getFlags().hasNonNeg())
+ OS << " nneg";
+
if (getFlags().hasNoNaNs())
OS << " nnan";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 35abd990f968..2018b5f0ee29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -78,6 +78,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -113,6 +114,7 @@
using namespace llvm;
#define DEBUG_TYPE "isel"
+#define ISEL_DUMP_DEBUG_TYPE DEBUG_TYPE "-dump"
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
@@ -180,6 +182,19 @@ static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
ViewSchedDAGs = false, ViewSUnitDAGs = false;
#endif
+#ifndef NDEBUG
+#define ISEL_DUMP(X) \
+ do { \
+ if (llvm::DebugFlag && \
+ (isCurrentDebugType(DEBUG_TYPE) || \
+ (isCurrentDebugType(ISEL_DUMP_DEBUG_TYPE) && MatchFilterFuncName))) { \
+ X; \
+ } \
+ } while (false)
+#else
+#define ISEL_DUMP(X) do { } while (false)
+#endif
+
//===---------------------------------------------------------------------===//
///
/// RegisterScheduler class - Track the registration of instruction schedulers.
@@ -204,6 +219,16 @@ static RegisterScheduler
defaultListDAGScheduler("default", "Best scheduler for the target",
createDefaultScheduler);
+static bool dontUseFastISelFor(const Function &Fn) {
+ // Don't enable FastISel for functions with swiftasync Arguments.
+ // Debug info on those is reliant on good Argument lowering, and FastISel is
+ // not capable of lowering the entire function. Mixing the two selectors tend
+ // to result in poor lowering of Arguments.
+ return any_of(Fn.args(), [](const Argument &Arg) {
+ return Arg.hasAttribute(Attribute::AttrKind::SwiftAsync);
+ });
+}
+
namespace llvm {
//===--------------------------------------------------------------------===//
@@ -211,29 +236,31 @@ namespace llvm {
/// the optimization level on a per-function basis.
class OptLevelChanger {
SelectionDAGISel &IS;
- CodeGenOpt::Level SavedOptLevel;
+ CodeGenOptLevel SavedOptLevel;
bool SavedFastISel;
public:
- OptLevelChanger(SelectionDAGISel &ISel,
- CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ OptLevelChanger(SelectionDAGISel &ISel, CodeGenOptLevel NewOptLevel)
+ : IS(ISel) {
SavedOptLevel = IS.OptLevel;
SavedFastISel = IS.TM.Options.EnableFastISel;
- if (NewOptLevel == SavedOptLevel)
- return;
- IS.OptLevel = NewOptLevel;
- IS.TM.setOptLevel(NewOptLevel);
- LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
- << NewOptLevel << "\n");
- if (NewOptLevel == CodeGenOpt::None) {
- IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
- LLVM_DEBUG(
- dbgs() << "\tFastISel is "
- << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
- << "\n");
+ if (NewOptLevel != SavedOptLevel) {
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel)
+ << " ; After: -O" << static_cast<int>(NewOptLevel)
+ << "\n");
+ if (NewOptLevel == CodeGenOptLevel::None)
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
}
+ if (dontUseFastISelFor(IS.MF->getFunction()))
+ IS.TM.setFastISel(false);
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
}
~OptLevelChanger() {
@@ -241,8 +268,8 @@ namespace llvm {
return;
LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
<< IS.MF->getFunction().getName() << "\n");
- LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
- << SavedOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(IS.OptLevel)
+ << " ; After: -O" << static_cast<int>(SavedOptLevel) << "\n");
IS.OptLevel = SavedOptLevel;
IS.TM.setOptLevel(SavedOptLevel);
IS.TM.setFastISel(SavedFastISel);
@@ -252,8 +279,8 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// createDefaultScheduler - This creates an instruction scheduler appropriate
/// for the target.
- ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
- CodeGenOpt::Level OptLevel) {
+ ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOptLevel OptLevel) {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
@@ -262,7 +289,7 @@ namespace llvm {
return SchedulerCtor(IS, OptLevel);
}
- if (OptLevel == CodeGenOpt::None ||
+ if (OptLevel == CodeGenOptLevel::None ||
(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
@@ -315,7 +342,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
//===----------------------------------------------------------------------===//
SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm,
- CodeGenOpt::Level OL)
+ CodeGenOptLevel OL)
: MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
@@ -335,23 +362,23 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<AAResultsWrapperPass>();
+ if (OptLevel != CodeGenOptLevel::None)
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
- if (UseMBPI && OptLevel != CodeGenOpt::None)
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ if (UseMBPI && OptLevel != CodeGenOptLevel::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
// AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
// the module.
AU.addRequired<AssignmentTrackingAnalysis>();
AU.addPreserved<AssignmentTrackingAnalysis>();
- if (OptLevel != CodeGenOpt::None)
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ if (OptLevel != CodeGenOptLevel::None)
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -391,6 +418,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const Function &Fn = mf.getFunction();
MF = &mf;
+#ifndef NDEBUG
+ StringRef FuncName = Fn.getName();
+ MatchFilterFuncName = isFunctionInPrintList(FuncName);
+#else
+ (void)MatchFilterFuncName;
+#endif
+
// Decide what flavour of variable location debug-info will be used, before
// we change the optimisation level.
bool InstrRef = mf.shouldUseDebugInstrRef();
@@ -403,9 +437,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// it wants to look at it.
TM.resetTargetOptions(Fn);
// Reset OptLevel to None for optnone functions.
- CodeGenOpt::Level NewOptLevel = OptLevel;
- if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
- NewOptLevel = CodeGenOpt::None;
+ CodeGenOptLevel NewOptLevel = OptLevel;
+ if (OptLevel != CodeGenOptLevel::None && skipFunction(Fn))
+ NewOptLevel = CodeGenOptLevel::None;
OptLevelChanger OLC(*this, NewOptLevel);
TII = MF->getSubtarget().getInstrInfo();
@@ -417,14 +451,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction());
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
BlockFrequencyInfo *BFI = nullptr;
- if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
FunctionVarLocs const *FnVarLocs = nullptr;
if (isAssignmentTrackingEnabled(*Fn.getParent()))
FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults();
- LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << "\n");
UniformityInfo *UA = nullptr;
if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
@@ -438,12 +472,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// into account). That's unfortunate but OK because it just means we won't
// ask for passes that have been required anyway.
- if (UseMBPI && OptLevel != CodeGenOpt::None)
+ if (UseMBPI && OptLevel != CodeGenOptLevel::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOptLevel::None)
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
else
AA = nullptr;
@@ -456,7 +490,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// We split CSR if the target supports it for the given function
// and the function has only return exits.
- if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
+ if (OptLevel != CodeGenOptLevel::None && TLI->supportSplitCSR(MF)) {
FuncInfo->SplitCSR = true;
// Collect all the return blocks.
@@ -656,8 +690,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// at this point.
FuncInfo->clear();
- LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
- LLVM_DEBUG(MF->print(dbgs()));
+ ISEL_DUMP(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ ISEL_DUMP(MF->print(dbgs()));
return true;
}
@@ -685,10 +719,13 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
CurDAG->NewNodesMustHaveLegalTypes = false;
// Lower the instructions. If a call is emitted as a tail call, cease emitting
- // nodes for this block.
+ // nodes for this block. If an instruction is elided, don't emit it, but do
+ // handle any debug-info attached to it.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
if (!ElidedArgCopyInstrs.count(&*I))
SDB->visit(*I);
+ else
+ SDB->visitDbgInfo(*I);
}
// Make sure the root of the DAG is up-to-date.
@@ -765,10 +802,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- LLVM_DEBUG(dbgs() << "Initial selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nInitial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -785,10 +822,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -807,10 +844,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -831,10 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -849,10 +886,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -865,10 +902,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -885,10 +922,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -905,10 +942,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nLegalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
@@ -925,17 +962,17 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
CurDAG->VerifyDAGDivergence();
#endif
- if (OptLevel != CodeGenOpt::None)
+ if (OptLevel != CodeGenOptLevel::None)
ComputeLiveOutVRegInfo();
if (ViewISelDAGs && MatchFilterBB)
@@ -949,10 +986,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- LLVM_DEBUG(dbgs() << "Selected selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ ISEL_DUMP(dbgs() << "\nSelected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -1357,6 +1394,8 @@ static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
// Find the corresponding livein physical register to this argument.
for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
if (VirtReg == ArgVReg) {
+ // Append an op deref to account for the fact that this is a dbg_declare.
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
<< ", Expr=" << *Expr << ", MCRegister=" << PhysReg
@@ -1510,7 +1549,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
for (const BasicBlock *LLVMBB : RPOT) {
- if (OptLevel != CodeGenOpt::None) {
+ if (OptLevel != CodeGenOptLevel::None) {
bool AllPredsVisited = true;
for (const BasicBlock *Pred : predecessors(LLVMBB)) {
if (!FuncInfo->VisitedBBs.count(Pred)) {
@@ -2074,41 +2113,43 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
--e; // Don't process a glue operand if it is here.
while (i != e) {
- unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
- if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) {
+ InlineAsm::Flag Flags(cast<ConstantSDNode>(InOps[i])->getZExtValue());
+ if (!Flags.isMemKind() && !Flags.isFuncKind()) {
// Just skip over this operand, copying the operands verbatim.
- Ops.insert(Ops.end(), InOps.begin()+i,
- InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
- i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ Ops.insert(Ops.end(), InOps.begin() + i,
+ InOps.begin() + i + Flags.getNumOperandRegisters() + 1);
+ i += Flags.getNumOperandRegisters() + 1;
} else {
- assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ assert(Flags.getNumOperandRegisters() == 1 &&
"Memory operand with multiple values?");
unsigned TiedToOperand;
- if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ if (Flags.isUseOperandTiedToDef(TiedToOperand)) {
// We need the constraint ID from the operand this is tied to.
unsigned CurOp = InlineAsm::Op_FirstOperand;
- Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ Flags =
+ InlineAsm::Flag(cast<ConstantSDNode>(InOps[CurOp])->getZExtValue());
for (; TiedToOperand; --TiedToOperand) {
- CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
- Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ CurOp += Flags.getNumOperandRegisters() + 1;
+ Flags = InlineAsm::Flag(
+ cast<ConstantSDNode>(InOps[CurOp])->getZExtValue());
}
}
// Otherwise, this is a memory operand. Ask the target to select it.
std::vector<SDValue> SelOps;
- unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ const InlineAsm::ConstraintCode ConstraintID =
+ Flags.getMemoryConstraintID();
if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
// Add this to the output node.
- unsigned NewFlags =
- InlineAsm::isMemKind(Flags)
- ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size())
- : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size());
- NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
- Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
+ Flags = InlineAsm::Flag(Flags.isMemKind() ? InlineAsm::Kind::Mem
+ : InlineAsm::Kind::Func,
+ SelOps.size());
+ Flags.setMemConstraint(ConstraintID);
+ Ops.push_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
llvm::append_range(Ops, SelOps);
i += 2;
}
@@ -2176,18 +2217,20 @@ static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
/// operand node N of U during instruction selection that starts at Root.
bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
+ if (OptLevel == CodeGenOptLevel::None)
+ return false;
return N.hasOneUse();
}
/// IsLegalToFold - Returns true if the specific operand node N of
/// U can be folded during instruction selection that starts at Root.
bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
- CodeGenOpt::Level OptLevel,
+ CodeGenOptLevel OptLevel,
bool IgnoreChains) {
- if (OptLevel == CodeGenOpt::None) return false;
+ if (OptLevel == CodeGenOptLevel::None)
+ return false;
- // If Root use can somehow reach N through a path that that doesn't contain
+ // If Root use can somehow reach N through a path that doesn't contain
// U then folding N would create a cycle. e.g. In the following
// diagram, Root can reach N through X. If N is folded into Root, then
// X is both a predecessor and a successor of U.
@@ -2435,6 +2478,13 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
return Val;
}
+void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) {
+ SDLoc dl(N);
+ CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue,
+ CurDAG->getTargetConstant(N->getConstantOperandVal(1),
+ dl, MVT::i64, true));
+}
+
/// When a match is complete, this method updates uses of interior chain results
/// to use the new results.
void SelectionDAGISel::UpdateChains(
@@ -2591,7 +2641,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
unsigned ResNumResults = Res->getNumValues();
// Move the glue if needed.
if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
- (unsigned)OldGlueResultNo != ResNumResults-1)
+ static_cast<unsigned>(OldGlueResultNo) != ResNumResults - 1)
ReplaceUses(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults - 1));
@@ -2600,7 +2650,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
- (unsigned)OldChainResultNo != ResNumResults-1)
+ static_cast<unsigned>(OldChainResultNo) != ResNumResults - 1)
ReplaceUses(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults - 1));
@@ -2639,8 +2689,11 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- const SelectionDAGISel &SDISel) {
- return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+ const SelectionDAGISel &SDISel, bool TwoBytePredNo) {
+ unsigned PredNo = MatcherTable[MatcherIndex++];
+ if (TwoBytePredNo)
+ PredNo |= MatcherTable[MatcherIndex++] << 8;
+ return SDISel.CheckPatternPredicate(PredNo);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
@@ -2654,15 +2707,17 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
- Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
return N->getOpcode() == Opc;
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
- if (N.getValueType() == VT) return true;
+ MVT::SimpleValueType VT =
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ if (N.getValueType() == VT)
+ return true;
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
@@ -2682,7 +2737,7 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
- (ISD::CondCode)MatcherTable[MatcherIndex++];
+ static_cast<ISD::CondCode>(MatcherTable[MatcherIndex++]);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2696,7 +2751,8 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ MVT::SimpleValueType VT =
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
@@ -2788,7 +2844,10 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
return Index;
case SelectionDAGISel::OPC_CheckPatternPredicate:
- Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ case SelectionDAGISel::OPC_CheckPatternPredicate2:
+ Result = !::CheckPatternPredicate(
+ Table, Index, SDISel,
+ Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2);
return Index;
case SelectionDAGISel::OPC_CheckPredicate:
Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
@@ -2981,6 +3040,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::PATCHPOINT:
Select_PATCHPOINT(NodeToMatch);
return;
+ case ISD::JUMP_TABLE_DEBUG_INFO:
+ Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -3042,7 +3104,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Get the opcode, add the index to the table.
uint16_t Opc = MatcherTable[Idx++];
- Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[Idx++]) << 8;
if (Opc >= OpcodeOffset.size())
OpcodeOffset.resize((Opc+1)*2);
OpcodeOffset[Opc] = Idx;
@@ -3059,7 +3121,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
#ifndef NDEBUG
unsigned CurrentOpcodeIndex = MatcherIndex;
#endif
- BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ BuiltinOpcodes Opcode =
+ static_cast<BuiltinOpcodes>(MatcherTable[MatcherIndex++]);
switch (Opcode) {
case OPC_Scope: {
// Okay, the semantics of this operation are that we should push a scope
@@ -3198,7 +3261,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
case OPC_CheckPatternPredicate:
- if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ case OPC_CheckPatternPredicate2:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this,
+ Opcode == OPC_CheckPatternPredicate2))
+ break;
continue;
case OPC_CheckPredicate:
if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
@@ -3265,7 +3331,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
uint16_t Opc = MatcherTable[MatcherIndex++];
- Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
@@ -3295,7 +3361,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
- MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ MVT CaseVT =
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (CaseVT == MVT::iPTR)
CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
@@ -3390,22 +3457,43 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
case OPC_EmitInteger:
- case OPC_EmitStringInteger: {
- MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ case OPC_EmitInteger8:
+ case OPC_EmitInteger16:
+ case OPC_EmitInteger32:
+ case OPC_EmitInteger64:
+ case OPC_EmitStringInteger:
+ case OPC_EmitStringInteger32: {
+ MVT::SimpleValueType VT;
+ switch (Opcode) {
+ case OPC_EmitInteger8:
+ VT = MVT::i8;
+ break;
+ case OPC_EmitInteger16:
+ VT = MVT::i16;
+ break;
+ case OPC_EmitInteger32:
+ case OPC_EmitStringInteger32:
+ VT = MVT::i32;
+ break;
+ case OPC_EmitInteger64:
+ VT = MVT::i64;
+ break;
+ default:
+ VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
+ break;
+ }
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- if (Opcode == OPC_EmitInteger)
+ if (Opcode >= OPC_EmitInteger && Opcode <= OPC_EmitInteger64)
Val = decodeSignRotatedValue(Val);
- RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
- VT), nullptr));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode *>(
+ CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
unsigned RegNo = MatcherTable[MatcherIndex++];
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
CurDAG->getRegister(RegNo, VT), nullptr));
@@ -3416,7 +3504,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// values are stored in two bytes in the matcher table (just like
// opcodes).
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
unsigned RegNo = MatcherTable[MatcherIndex++];
RegNo |= MatcherTable[MatcherIndex++] << 8;
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
@@ -3562,7 +3650,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
- TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
// Get the result VT list.
unsigned NumVTs;
@@ -3577,7 +3665,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
SmallVector<EVT, 4> VTs;
for (unsigned i = 0; i != NumVTs; ++i) {
MVT::SimpleValueType VT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
if (VT == MVT::iPTR)
VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
VTs.push_back(VT);
@@ -3667,7 +3755,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
- llvm::erase_value(Chain, N);
+ llvm::erase(Chain, N);
});
Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
Ops, EmitNodeInfo));
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5afd05648772..cf32350036d4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -62,15 +62,15 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
STATISTIC(StatepointMaxSlotsRequired,
"Maximum number of stack slots required for a singe statepoint");
-cl::opt<bool> UseRegistersForDeoptValues(
+static cl::opt<bool> UseRegistersForDeoptValues(
"use-registers-for-deopt-values", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for non pointer deopt args"));
-cl::opt<bool> UseRegistersForGCPointersInLandingPad(
+static cl::opt<bool> UseRegistersForGCPointersInLandingPad(
"use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for gc pointer in landing pad"));
-cl::opt<unsigned> MaxRegistersForGCPointers(
+static cl::opt<unsigned> MaxRegistersForGCPointers(
"max-registers-for-gc-values", cl::Hidden, cl::init(0),
cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
@@ -525,7 +525,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// deopt argument length, deopt arguments.., gc arguments...
// Figure out what lowering strategy we're going to use for each part
- // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // Note: It is conservatively correct to lower both "live-in" and "live-out"
// as "live-through". A "live-through" variable is one which is "live-in",
// "live-out", and live throughout the lifetime of the call (i.e. we can find
// it from any PC within the transitive callee of the statepoint). In
@@ -715,7 +715,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
assert((GFI || SI.Bases.empty()) &&
"No gc specified, so cannot relocate pointers!");
- LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
+ LLVM_DEBUG(if (SI.StatepointInstr) dbgs()
+ << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
for (const auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -1032,10 +1033,16 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
ActualCallee = Callee;
}
+ const auto GCResultLocality = getGCResultLocality(I);
+ AttributeSet retAttrs;
+ if (GCResultLocality.first)
+ retAttrs = GCResultLocality.first->getAttributes().getRetAttrs();
+
StatepointLoweringInfo SI(DAG);
populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos,
I.getNumCallArgs(), ActualCallee,
- I.getActualReturnType(), false /* IsPatchPoint */);
+ I.getActualReturnType(), retAttrs,
+ /*IsPatchPoint=*/false);
// There may be duplication in the gc.relocate list; such as two copies of
// each relocation on normal and exceptional path for an invoke. We only
@@ -1091,8 +1098,6 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const auto GCResultLocality = getGCResultLocality(I);
-
if (!GCResultLocality.first && !GCResultLocality.second) {
// The return value is not needed, just generate a poison value.
// Note: This covers the void return case.
@@ -1137,7 +1142,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
populateCallLoweringInfo(
SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
- false);
+ Call->getAttributes().getRetAttrs(), /*IsPatchPoint=*/false);
if (!VarArgDisallowed)
SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
@@ -1156,6 +1161,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
// NB! The GC arguments are deliberately left empty.
+ LLVM_DEBUG(dbgs() << "Lowering call with deopt bundle " << *Call << "\n");
if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
setValue(Call, ReturnVal);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a84d35a6ea4e..c5977546828f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -472,6 +473,17 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
}
+SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
+ SDValue Addr, int JTI,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Value;
+ // Jump table debug info is only needed if CodeView is enabled.
+ if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
+ Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
+ }
+ return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
+}
+
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
const TargetMachine &TM = getTargetMachine();
@@ -554,8 +566,9 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
}
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
-/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
-/// generalized for targets with other types of implicit widening casts.
+/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
+/// but it could be generalized for targets with other types of implicit
+/// widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
@@ -1040,13 +1053,10 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
// larger type size to do the transform.
if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
return SDValue();
-
- if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
- Add.getOperand(1)) ==
- SelectionDAG::OFK_Never &&
- (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
- Add2.getOperand(1)) ==
- SelectionDAG::OFK_Never))
+ if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) &&
+ (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1))))
NVT = VT;
else
return SDValue();
@@ -1155,6 +1165,18 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
+ case ISD::SPLAT_VECTOR: {
+ SDValue Scl = Op.getOperand(0);
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
+ KnownBits KnownScl;
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = KnownScl.trunc(BitWidth);
+ break;
+ }
case ISD::LOAD: {
auto *LD = cast<LoadSDNode>(Op);
if (getTargetConstantFromLoad(LD)) {
@@ -1765,8 +1787,17 @@ bool TargetLowering::SimplifyDemandedBits(
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
@@ -1788,6 +1819,37 @@ bool TargetLowering::SimplifyDemandedBits(
if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
+ // Only do this if we demand the upper half so the knownbits are correct.
+ unsigned HalfWidth = BitWidth / 2;
+ if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
+ DemandedBits.countLeadingOnes() >= HalfWidth) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SHL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
+ // If we're demanding the upper bits at all, we must ensure
+ // that the upper bits of the shift result are known to be zero,
+ // which is equivalent to the narrow shift being NUW.
+ if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
+ bool IsNSW = Known.countMinSignBits() > HalfWidth;
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(IsNSW);
+ Flags.setNoUnsignedWrap(IsNUW);
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
+ NewShiftAmt, Flags);
+ SDValue NewExt =
+ TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
+ return TLO.CombineTo(Op, NewExt);
+ }
+ }
+ }
} else {
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
@@ -1870,15 +1932,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Narrow shift to lower half - similar to ShrinkDemandedOp.
// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
- if ((BitWidth % 2) == 0 && !VT.isVector() &&
- ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
- TLO.DAG.MaskedValueIsZero(
- Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ if ((BitWidth % 2) == 0 && !VT.isVector()) {
+ APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
if (isNarrowingProfitable(VT, HalfVT) &&
isTypeDesirableForOp(ISD::SRL, HalfVT) &&
isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
- (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
ShAmt, HalfVT, dl, TLO.LegalTypes());
@@ -1945,6 +2007,35 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
+ // supports sext_inreg.
+ if (Op0.getOpcode() == ISD::SHL) {
+ if (const APInt *InnerSA =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned LowBits = BitWidth - ShAmt;
+ EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
+ VT.getVectorElementCount());
+
+ if (*InnerSA == ShAmt) {
+ if (!TLO.LegalOperations() ||
+ getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ Op0.getOperand(0),
+ TLO.DAG.getValueType(ExtVT)));
+
+ // Even if we can't convert to sext_inreg, we might be able to
+ // remove this shift pair if the input is already sign extended.
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
+ if (NumSignBits > ShAmt)
+ return TLO.CombineTo(Op, Op0.getOperand(0));
+ }
+ }
+ }
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -2106,30 +2197,57 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
- case ISD::UMIN: {
- // Check if one arg is always less than (or equal) to the other arg.
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
- KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
- Known = KnownBits::umin(Known0, Known1);
- if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
- return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
- if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
- return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
- break;
- }
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
case ISD::UMAX: {
- // Check if one arg is always greater than (or equal) to the other arg.
+ unsigned Opc = Op.getOpcode();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+
+ // If we're only demanding signbits, then we can simplify to OR/AND node.
+ unsigned BitOp =
+ (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
+ unsigned NumSignBits =
+ std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
+ TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
+ unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
+ if (NumSignBits >= NumDemandedUpperBits)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
+
+ // Check if one arg is always less/greater than (or equal) to the other arg.
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
- Known = KnownBits::umax(Known0, Known1);
- if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
- return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
- if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
- return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
+ switch (Opc) {
+ case ISD::SMIN:
+ if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
+ if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
+ Known = KnownBits::smin(Known0, Known1);
+ break;
+ case ISD::SMAX:
+ if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
+ if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
+ Known = KnownBits::smax(Known0, Known1);
+ break;
+ case ISD::UMIN:
+ if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
+ if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
+ Known = KnownBits::umin(Known0, Known1);
+ break;
+ case ISD::UMAX:
+ if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
+ if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
+ Known = KnownBits::umax(Known0, Known1);
+ break;
+ }
break;
}
case ISD::BITREVERSE: {
@@ -2285,11 +2403,17 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
+ SDNodeFlags Flags = Op->getFlags();
APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ if (Flags.hasNonNeg()) {
+ Flags.setNonNeg(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
@@ -2653,7 +2777,7 @@ bool TargetLowering::SimplifyDemandedBits(
// neg x with only low bit demanded is simply x.
if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
- isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
+ isNullConstant(Op0))
return TLO.CombineTo(Op, Op1);
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -2913,8 +3037,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
TLO.DAG, Depth + 1);
if (NewOp0 || NewOp1) {
- SDValue NewOp = TLO.DAG.getNode(
- Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
+ SDValue NewOp =
+ TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
+ NewOp1 ? NewOp1 : Op1, Op->getFlags());
return TLO.CombineTo(Op, NewOp);
}
return false;
@@ -3823,8 +3948,12 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+ // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
+ // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
+ // its liable to create and infinite loop.
SDValue Zero = DAG.getConstant(0, DL, OpVT);
- if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
+ DAG.isKnownToBeAPowerOfTwo(Y)) {
// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
@@ -3843,8 +3972,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
- auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isZero())
+ if (isNullConstant(Y))
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -4088,8 +4216,8 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
- // Keep the CTPOP if it is a legal vector op.
- if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is a cheap vector op.
+ if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
return SDValue();
unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
@@ -4110,28 +4238,32 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}
- // Expand a power-of-2 comparison based on ctpop:
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ // Expand a power-of-2 comparison based on ctpop
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
- // Keep the CTPOP if it is legal.
- if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ // Keep the CTPOP if it is cheap.
+ if (TLI.isCtpopFast(CTVT))
return SDValue();
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+
// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
- // check before the emit a potentially unnecessary op.
- if (DAG.isKnownNeverZero(CTOp))
+ // check before emitting a potentially unnecessary op.
+ if (DAG.isKnownNeverZero(CTOp)) {
+ // (ctpop x) == 1 --> (x & x-1) == 0
+ // (ctpop x) != 1 --> (x & x-1) != 0
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
return RHS;
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
- unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
- return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+
+ // (ctpop x) == 1 --> (x ^ x-1) > x-1
+ // (ctpop x) != 1 --> (x ^ x-1) <= x-1
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
+ ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
+ return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
}
return SDValue();
@@ -4477,8 +4609,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr =
- DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
+ dl);
SDValue NewLoad =
DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
Lod->getPointerInfo().getWithOffset(bestOffset),
@@ -4983,6 +5115,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
+ if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
+ !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
+ bool IsFabs = N0.getOpcode() == ISD::FABS;
+ SDValue Op = IsFabs ? N0.getOperand(0) : N0;
+ if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
+ FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
+ : (IsFabs ? fcInf : fcPosInf);
+ if (Cond == ISD::SETUEQ)
+ Flag |= fcNan;
+ return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
+ DAG.getTargetConstant(Flag, dl, MVT::i32));
+ }
+ }
+
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
@@ -5037,7 +5184,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (isBitwiseNot(N1))
return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
SDValue Not = DAG.getNOT(dl, N1, OpVT);
return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
}
@@ -5297,11 +5445,12 @@ SDValue TargetLowering::LowerAsmOutputForConstraint(
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
+ StringRef Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- if (Constraint.length() > 1) return;
+ if (Constraint.size() > 1)
+ return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
@@ -5620,20 +5769,27 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
return ConstraintOperands;
}
-/// Return an integer indicating how general CT is.
-static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+/// Return a number indicating our preference for chosing a type of constraint
+/// over another, for the purpose of sorting them. Immediates are almost always
+/// preferrable (when they can be emitted). A higher return value means a
+/// stronger preference for one constraint type relative to another.
+/// FIXME: We should prefer registers over memory but doing so may lead to
+/// unrecoverable register exhaustion later.
+/// https://github.com/llvm/llvm-project/issues/20571
+static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
- case TargetLowering::C_Unknown:
- return 0;
- case TargetLowering::C_Register:
- return 1;
- case TargetLowering::C_RegisterClass:
- return 2;
+ return 4;
case TargetLowering::C_Memory:
case TargetLowering::C_Address:
return 3;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_Unknown:
+ return 0;
}
llvm_unreachable("Invalid constraint type");
}
@@ -5713,11 +5869,15 @@ TargetLowering::ConstraintWeight
/// If there are multiple different constraints that we could pick for this
/// operand (e.g. "imr") try to pick the 'best' one.
-/// This is somewhat tricky: constraints fall into four classes:
-/// Other -> immediates and magic values
+/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
+/// into seven classes:
/// Register -> one specific register
/// RegisterClass -> a group of regs
/// Memory -> memory
+/// Address -> a symbolic memory reference
+/// Immediate -> immediate values
+/// Other -> magic values (such as "Flag Output Operands")
+/// Unknown -> something we don't recognize yet and can't handle
/// Ideally, we would pick the most specific constraint possible: if we have
/// something that fits into a register, we would pick it. The problem here
/// is that if we have something that could either be in a register or in
@@ -5731,18 +5891,13 @@ TargetLowering::ConstraintWeight
/// 2) Otherwise, pick the most general constraint present. This prefers
/// 'm' over 'r', for example.
///
-static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- const TargetLowering &TLI,
- SDValue Op, SelectionDAG *DAG) {
- assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
- unsigned BestIdx = 0;
- TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
- int BestGenerality = -1;
+TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
+ TargetLowering::AsmOperandInfo &OpInfo) const {
+ ConstraintGroup Ret;
- // Loop over the options, keeping track of the most general one.
- for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
- TargetLowering::ConstraintType CType =
- TLI.getConstraintType(OpInfo.Codes[i]);
+ Ret.reserve(OpInfo.Codes.size());
+ for (StringRef Code : OpInfo.Codes) {
+ TargetLowering::ConstraintType CType = getConstraintType(Code);
// Indirect 'other' or 'immediate' constraints are not allowed.
if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
@@ -5750,40 +5905,38 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
CType == TargetLowering::C_RegisterClass))
continue;
- // If this is an 'other' or 'immediate' constraint, see if the operand is
- // valid for it. For example, on X86 we might have an 'rI' constraint. If
- // the operand is an integer in the range [0..31] we want to use I (saving a
- // load of a register), otherwise we must use 'r'.
- if ((CType == TargetLowering::C_Other ||
- CType == TargetLowering::C_Immediate) && Op.getNode()) {
- assert(OpInfo.Codes[i].size() == 1 &&
- "Unhandled multi-letter 'other' constraint");
- std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
- ResultOps, *DAG);
- if (!ResultOps.empty()) {
- BestType = CType;
- BestIdx = i;
- break;
- }
- }
-
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
- // This constraint letter is more general than the previous one, use it.
- int Generality = getConstraintGenerality(CType);
- if (Generality > BestGenerality) {
- BestType = CType;
- BestIdx = i;
- BestGenerality = Generality;
- }
+ Ret.emplace_back(Code, CType);
}
- OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
- OpInfo.ConstraintType = BestType;
+ std::stable_sort(
+ Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
+ return getConstraintPiority(a.second) > getConstraintPiority(b.second);
+ });
+
+ return Ret;
+}
+
+/// If we have an immediate, see if we can lower it. Return true if we can,
+/// false otherwise.
+static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
+ SDValue Op, SelectionDAG *DAG,
+ const TargetLowering &TLI) {
+
+ assert((P.second == TargetLowering::C_Other ||
+ P.second == TargetLowering::C_Immediate) &&
+ "need immediate or other");
+
+ if (!Op.getNode())
+ return false;
+
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
+ return !ResultOps.empty();
}
/// Determines the constraint code and constraint type to use for the specific
@@ -5798,7 +5951,26 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
} else {
- ChooseConstraint(OpInfo, *this, Op, DAG);
+ ConstraintGroup G = getConstraintPreferences(OpInfo);
+ if (G.empty())
+ return;
+
+ unsigned BestIdx = 0;
+ for (const unsigned E = G.size();
+ BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
+ G[BestIdx].second == TargetLowering::C_Immediate);
+ ++BestIdx) {
+ if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
+ break;
+ // If we're out of constraints, just pick the first one.
+ if (BestIdx + 1 == E) {
+ BestIdx = 0;
+ break;
+ }
+ }
+
+ OpInfo.ConstraintCode = G[BestIdx].first;
+ OpInfo.ConstraintType = G[BestIdx].second;
}
// 'X' matches anything.
@@ -5914,6 +6086,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
+/// Build sdiv by power-of-2 with conditional move instructions
+/// Ref: "Hacker's Delight" by Henry Warren 10-1
+/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
+/// bgez x, label
+/// add x, x, 2**k-1
+/// label:
+/// sra res, x, k
+/// neg res, res (when the divisor is negative)
+SDValue TargetLowering::buildSDIVPow2WithCMov(
+ SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ unsigned Lg2 = Divisor.countr_zero();
+ EVT VT = N->getValueType(0);
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+
+ // Divide by pow2.
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (Divisor.isNonNegative())
+ return SRA;
+
+ Created.push_back(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
+}
+
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -6016,7 +6231,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
// Multiply the numerator (operand 0) by the magic value.
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHS = [&](SDValue X, SDValue Y) {
- // If the type isn't legal, use a wider mul of the the type calculated
+ // If the type isn't legal, use a wider mul of the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
@@ -6203,7 +6418,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
// FIXME: We should support doing a MUL in a wider type.
auto GetMULHU = [&](SDValue X, SDValue Y) {
- // If the type isn't legal, use a wider mul of the the type calculated
+ // If the type isn't legal, use a wider mul of the type calculated
// earlier.
if (!isTypeLegal(VT)) {
X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
@@ -9131,7 +9346,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SrcEltVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -9206,7 +9421,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
DAG.getVectorIdxConstant(Idx, SL));
SDValue Ptr =
- DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
+ DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
@@ -9342,7 +9557,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
@@ -9352,7 +9567,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
@@ -9477,6 +9692,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue ShiftAmount = DAG.getConstant(
NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
SDValue Lo = Val;
+ // If Val is a constant, replace the upper bits with 0. The SRL will constant
+ // fold and not use the upper bits. A smaller constant may be easier to
+ // materialize.
+ if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
+ Lo = DAG.getNode(
+ ISD::AND, dl, VT, Lo,
+ DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
+ VT));
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
// Store the two parts
@@ -9486,7 +9709,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
@@ -9618,7 +9841,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
// Access to address of TLS varialbe xyz is lowered to a function call:
// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
EVT PtrVT = getPointerTy(DAG.getDataLayout());
- PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
SDLoc dl(GA);
ArgListTy Args;
@@ -9657,20 +9880,18 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isZero() && CC == ISD::SETEQ) {
- EVT VT = Op.getOperand(0).getValueType();
- SDValue Zext = Op.getOperand(0);
- if (VT.bitsLT(MVT::i32)) {
- VT = MVT::i32;
- Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
- }
- unsigned Log2b = Log2_32(VT.getSizeInBits());
- SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
- SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
- DAG.getConstant(Log2b, dl, MVT::i32));
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
}
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}
return SDValue();
}
@@ -10489,9 +10710,9 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
}
- // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
+ // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
// libcall emission cannot handle this. Large result types will fail.
- if (SrcVT == MVT::f16) {
+ if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
SrcVT = Src.getValueType();
}