aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-02-16 20:13:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-02-16 20:13:02 +0000
commitb60736ec1405bb0a8dd40989f67ef4c93da068ab (patch)
tree5c43fbb7c9fc45f0f87e0e6795a86267dbd12f9d /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parentcfca06d7963fa0909f90483b42a6d7d194d01e08 (diff)
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2383
1 files changed, 1488 insertions, 895 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f14b3dba4f31..615bea2a4905 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,12 +24,14 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -410,9 +412,11 @@ namespace {
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
+ SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
@@ -464,6 +468,7 @@ namespace {
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
+ SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
@@ -539,6 +544,7 @@ namespace {
SDValue convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC);
+ SDValue foldSignChangeInBitcast(SDNode *N);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -586,7 +592,7 @@ namespace {
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
- SDValue MatchStoreCombine(StoreSDNode *N);
+ SDValue mergeTruncStores(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -641,14 +647,18 @@ namespace {
// Classify the origin of a stored value.
enum class StoreSource { Unknown, Constant, Extract, Load };
StoreSource getStoreSource(SDValue StoreVal) {
- if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
+ switch (StoreVal.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
return StoreSource::Constant;
- if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
- if (isa<LoadSDNode>(StoreVal))
+ case ISD::LOAD:
return StoreSource::Load;
- return StoreSource::Unknown;
+ default:
+ return StoreSource::Unknown;
+ }
}
/// This is a helper function for visitMUL to check the profitability
@@ -752,9 +762,7 @@ namespace {
/// is legal or custom before legalizing operations, and whether is
/// legal (but not custom) after legalization.
bool hasOperation(unsigned Opcode, EVT VT) {
- if (LegalOperations)
- return TLI.isOperationLegal(Opcode, VT);
- return TLI.isOperationLegalOrCustom(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
}
public:
@@ -924,23 +932,40 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-// Returns the SDNode if it is a constant float BuildVector
-// or constant float.
-static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
- if (isa<ConstantFPSDNode>(N))
- return N.getNode();
- if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
- return N.getNode();
- return nullptr;
+static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
+ if (!ScalarTy.isSimple())
+ return false;
+
+ uint64_t MaskForTy = 0ULL;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ MaskForTy = 0xFFULL;
+ break;
+ case MVT::i16:
+ MaskForTy = 0xFFFFULL;
+ break;
+ case MVT::i32:
+ MaskForTy = 0xFFFFFFFFULL;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ APInt Val;
+ if (ISD::isConstantSplatVector(N, Val))
+ return Val.getLimitedValue() == MaskForTy;
+
+ return false;
}
-// Determines if it is a constant integer or a build vector of constant
+// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
- if (N.getOpcode() != ISD::BUILD_VECTOR)
+ if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
return false;
unsigned BitWidth = N.getScalarValueSizeInBits();
for (const SDValue &Op : N->op_values()) {
@@ -1554,9 +1579,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DAG.ReplaceAllUsesWith(N, &RV);
}
- // Push the new node and any users onto the worklist
- AddToWorklist(RV.getNode());
- AddUsersToWorklist(RV.getNode());
+ // Push the new node and any users onto the worklist. Omit this if the
+ // new node is the EntryToken (e.g. if a store managed to get optimized
+ // out), because re-visiting the EntryToken and its users will not uncover
+ // any additional opportunities, but there may be a large number of such
+ // users, potentially causing compile time explosion.
+ if (RV.getOpcode() != ISD::EntryToken) {
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
+ }
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1589,8 +1620,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1646,6 +1679,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
+ case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
@@ -1805,6 +1839,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Don't simplify the token factor if the node itself has too many operands.
+ if (N->getNumOperands() > TokenFactorInlineLimit)
+ return SDValue();
+
// If the sole user is a token factor, we should make sure we have a
// chance to merge them together. This prevents TF chains from inhibiting
// optimizations.
@@ -1890,7 +1928,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
// If this is an Op, we can remove the op from the list. Remark any
// search associated with it as from the current OpNumber.
- if (SeenOps.count(Op) != 0) {
+ if (SeenOps.contains(Op)) {
Changed = true;
DidPruneOps = true;
unsigned OrigOpNumber = 0;
@@ -2002,6 +2040,62 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
"Unexpected binary operator");
@@ -2021,12 +2115,12 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CT = Sel.getOperand(1);
if (!isConstantOrConstantVector(CT, true) &&
- !isConstantFPBuildVectorOrConstantFP(CT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CT))
return SDValue();
SDValue CF = Sel.getOperand(2);
if (!isConstantOrConstantVector(CF, true) &&
- !isConstantFPBuildVectorOrConstantFP(CF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
// Bail out if any constants are opaque because we can't constant fold those.
@@ -2043,19 +2137,10 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
!isConstantOrConstantVector(CBO, true) &&
- !isConstantFPBuildVectorOrConstantFP(CBO))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- EVT VT = Sel.getValueType();
-
- // In case of shift value and shift amount may have different VT. For instance
- // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
- // swapped operands and value types do not match. NB: x86 is fine if operands
- // are not swapped with shift amount VT being not bigger than shifted value.
- // TODO: that is possible to check for a shift operation, correct VTs and
- // still perform optimization on x86 if needed.
- if (SelOpNo && VT != CBO.getValueType())
- return SDValue();
+ EVT VT = BO->getValueType(0);
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
@@ -2065,14 +2150,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
@@ -2402,8 +2487,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
- APInt C0 = N0->getConstantOperandAPInt(0);
- APInt C1 = N1->getConstantOperandAPInt(0);
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
return DAG.getVScale(DL, VT, C0 + C1);
}
@@ -2411,9 +2496,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if ((N0.getOpcode() == ISD::ADD) &&
(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
(N1.getOpcode() == ISD::VSCALE)) {
- auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
- auto VS1 = N1->getConstantOperandAPInt(0);
- auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &VS1 = N1->getConstantOperandAPInt(0);
+ SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
}
@@ -2631,36 +2716,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL,
- SelectionDAG &DAG, const TargetLowering &TLI) {
- EVT VT = V.getValueType();
-
- SDValue Cst;
- switch (TLI.getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- Cst = DAG.getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getAllOnesConstant(DL, VT);
- break;
- }
-
- return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
-}
-
/**
* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
* then the flip also occurs if computing the inverse is the same cost.
* This function returns an empty SDValue in case it cannot flip the boolean
* without increasing the cost of the computation. If you want to flip a boolean
- * no matter what, use flipBoolean.
+ * no matter what, use DAG.getLogicalNOT.
*/
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
const TargetLowering &TLI,
bool Force) {
if (Force && isa<ConstantSDNode>(V))
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
if (V.getOpcode() != ISD::XOR)
return SDValue();
@@ -2687,7 +2754,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
if (IsFlip)
return V.getOperand(0);
if (Force)
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
return SDValue();
}
@@ -2724,8 +2791,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT), N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
if (SDValue Combined = visitUADDOLike(N0, N1, N))
@@ -2820,6 +2887,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
@@ -3005,8 +3094,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDLoc DL(N);
SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
@@ -3111,6 +3200,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// 0 - X --> X if X is 0 or the minimum signed value.
return N1;
}
+
+ // Convert 0 - abs(x).
+ SDValue Result;
+ if (N1->getOpcode() == ISD::ABS &&
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
+ TLI.expandABS(N1.getNode(), Result, DAG, true))
+ return Result;
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
@@ -3306,12 +3402,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
- }
}
}
@@ -3342,7 +3436,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
if (N1.getOpcode() == ISD::VSCALE) {
- APInt IntVal = N1.getConstantOperandAPInt(0);
+ const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
@@ -3501,6 +3595,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
@@ -3606,19 +3715,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
- // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // Try to transform:
+ // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
// mul x, (2^N - 1) --> sub (shl x, N), x
// Examples: x * 33 --> (x << 5) + x
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
+ // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
+ // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
+ // Examples: x * 0x8800 --> (x << 15) + (x << 11)
+ // x * 0xf800 --> (x << 16) - (x << 11)
+ // x * -0x8800 --> -((x << 15) + (x << 11))
+ // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
+ // The constant `2` should be treated as (2^0 + 1).
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
+ MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
else if ((MulC + 1).isPowerOf2())
@@ -3627,12 +3747,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (MathOp != ISD::DELETED_NODE) {
unsigned ShAmt =
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
- SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ SDValue R =
+ TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(TZeros, DL, VT)))
+ : DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
return R;
@@ -3684,11 +3809,42 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, C0 * C1);
}
+ // Fold ((mul x, 0/undef) -> 0,
+ // (mul x, 1) -> x) -> x)
+ // -> and(x, mask)
+ // We can replace vectors with '0' and '1' factors with a clearing mask.
+ if (VT.isFixedLengthVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallBitVector ClearMask;
+ ClearMask.reserve(NumElts);
+ auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
+ if (!V || V->isNullValue()) {
+ ClearMask.push_back(true);
+ return true;
+ }
+ ClearMask.push_back(false);
+ return V->isOne();
+ };
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
+ ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
+ assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
+ SDLoc DL(N);
+ EVT LegalSVT = N1.getOperand(0).getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
+ SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (ClearMask[I])
+ Mask[I] = Zero;
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
+ }
+ }
+
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
@@ -4108,9 +4264,9 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
// fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4118,6 +4274,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4186,7 +4343,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4242,7 +4400,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4448,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
}
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4916,8 +5079,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!LDST->isSimple())
return false;
+ EVT LdStMemVT = LDST->getMemoryVT();
+
+ // Bail out when changing the scalable property, since we can't be sure that
+ // we're actually narrowing here.
+ if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
+ return false;
+
// Verify that we are actually reducing a load width here.
- if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
+ if (LdStMemVT.bitsLT(MemVT))
return false;
// Ensure that this isn't going to produce an unsupported memory access.
@@ -5272,6 +5442,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N1;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N0;
+
+ // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
+ auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
+ auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
+ if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ EVT LoadVT = MLoad->getMemoryVT();
+ EVT ExtVT = VT;
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
+ // For this AND to be a zero extension of the masked load the elements
+ // of the BuildVec must mask the bottom bits of the extended element
+ // type
+ if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ return DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ }
+ }
+ }
+ }
}
// fold (and c1, c2) -> c1&c2
@@ -5440,6 +5635,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // fold (and (masked_gather x)) -> (zext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ EVT MemVT = GN0->getMemoryVT();
+ EVT ScalarVT = MemVT.getScalarType();
+
+ if (SDValue(GN0, 0).hasOneUse() &&
+ isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ZExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+
+ CombineTo(N, ZExtLoad);
+ AddToWorklist(ZExtLoad.getNode());
+ // Avoid recheck of N.
+ return SDValue(N, 0);
+ }
+ }
+
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
@@ -5534,6 +5751,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
return V;
+ // Recognize the following pattern:
+ //
+ // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
+ //
+ // where bitmask is a mask that clears the upper bits of AndVT. The
+ // number of bits in bitmask must be a power of two.
+ auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
+ if (LHS->getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C)
+ return false;
+
+ if (!C->getAPIntValue().isMask(
+ LHS.getOperand(0).getValueType().getFixedSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
+ if (IsAndZeroExtMask(N0, N1))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+
return SDValue();
}
@@ -6782,11 +7024,11 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
-static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
return i;
}
-static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
return BW - i - 1;
}
@@ -6803,8 +7045,8 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
return None;
}
@@ -6847,80 +7089,90 @@ static SDValue stripTruncAndExt(SDValue Value) {
/// p[3] = (val >> 0) & 0xFF;
/// =>
/// *((i32)p) = BSWAP(val);
-SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
+ // The matching looks for "store (trunc x)" patterns that appear early but are
+ // likely to be replaced by truncating store nodes during combining.
+ // TODO: If there is evidence that running this later would help, this
+ // limitation could be removed. Legality checks may need to be added
+ // for the created store and optional bswap/rotate.
+ if (LegalOperations)
+ return SDValue();
+
// Collect all the stores in the chain.
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
// TODO: Allow unordered atomics when wider type is legal (see D66309)
- if (Store->getMemoryVT() != MVT::i8 ||
+ EVT MemVT = Store->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
}
- // Handle the simple type only.
- unsigned Width = Stores.size();
- EVT VT = EVT::getIntegerVT(
- *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ // There is no reason to continue if we do not have at least a pair of stores.
+ if (Stores.size() < 2)
return SDValue();
- if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ // Handle simple types only.
+ LLVMContext &Context = *DAG.getContext();
+ unsigned NumStores = Stores.size();
+ unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
+ unsigned WideNumBits = NumStores * NarrowNumBits;
+ EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
+ if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
return SDValue();
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
- SDValue CombinedValue;
- SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
+ // Check if all bytes of the source value that we are looking at are stored
+ // to the same base address. Collect offsets from Base address into OffsetMap.
+ SDValue SourceValue;
+ SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
for (auto Store : Stores) {
- // All the stores store different byte of the CombinedValue. A truncate is
- // required to get that byte value.
+ // All the stores store different parts of the CombinedValue. A truncate is
+ // required to get the partial value.
SDValue Trunc = Store->getValue();
if (Trunc.getOpcode() != ISD::TRUNCATE)
return SDValue();
- // A shift operation is required to get the right byte offset, except the
- // first byte.
+ // Other than the first/last part, a shift operation is required to get the
+ // offset.
int64_t Offset = 0;
- SDValue Value = Trunc.getOperand(0);
- if (Value.getOpcode() == ISD::SRL ||
- Value.getOpcode() == ISD::SRA) {
- auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
- // a constant and a multiple of 8. It is the byte offset in "y".
+ SDValue WideVal = Trunc.getOperand(0);
+ if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
+ isa<ConstantSDNode>(WideVal.getOperand(1))) {
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
//
- // x = srl y, offset
+ // x = srl y, ShiftAmtC
// i8 z = trunc x
// store z, ...
- if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
+ if (ShiftAmtC % NarrowNumBits != 0)
return SDValue();
- Offset = ShiftOffset->getSExtValue()/8;
- Value = Value.getOperand(0);
+ Offset = ShiftAmtC / NarrowNumBits;
+ WideVal = WideVal.getOperand(0);
}
- // Stores must share the same combined value with different offsets.
- if (!CombinedValue)
- CombinedValue = Value;
- else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ // Stores must share the same source value with different offsets.
+ // Truncate and extends should be stripped to get the single source value.
+ if (!SourceValue)
+ SourceValue = WideVal;
+ else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
return SDValue();
-
- // The trunc and all the extend operation should be stripped to get the
- // real value we are stored.
- else if (CombinedValue.getValueType() != VT) {
- if (Value.getValueType() == VT ||
- Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
- CombinedValue = Value;
- // Give up if the combined value type is smaller than the store size.
- if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ else if (SourceValue.getValueType() != WideVT) {
+ if (WideVal.getValueType() == WideVT ||
+ WideVal.getScalarValueSizeInBits() >
+ SourceValue.getScalarValueSizeInBits())
+ SourceValue = WideVal;
+ // Give up if the source value type is smaller than the store size.
+ if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
return SDValue();
}
- // Stores must share the same base address
+ // Stores must share the same base address.
BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
@@ -6928,60 +7180,78 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
- // Remember the first byte store
+ // Remember the first store.
if (ByteOffsetFromBase < FirstOffset) {
FirstStore = Store;
FirstOffset = ByteOffsetFromBase;
}
// Map the offset in the store and the offset in the combined value, and
// early return if it has been set before.
- if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
return SDValue();
- ByteOffsets[Offset] = ByteOffsetFromBase;
+ OffsetMap[Offset] = ByteOffsetFromBase;
}
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
- // Check if the bytes of the combined value we are looking at match with
- // either big or little endian value store.
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
- if (!IsBigEndian.hasValue())
- return SDValue();
-
- // The node we are looking at matches with the pattern, check if we can
- // replace it with a single bswap if needed and store.
-
- // If the store needs byte swap check if the target supports it
- bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
-
- // Before legalize we can introduce illegal bswaps which will be later
- // converted to an explicit bswap sequence. This way we end up with a single
- // store and byte shuffling instead of several stores and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
- return SDValue();
-
// Check that a store of the wide type is both allowed and fast on the target
+ const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
- bool Allowed =
- TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *FirstStore->getMemOperand(), &Fast);
+ bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
+ *FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- if (VT != CombinedValue.getValueType()) {
- assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
- "Get unexpected store value to combine");
- CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
- CombinedValue);
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStores; ++i)
+ if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
+ if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(Layout.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
+ NeedRotate = true;
+ else
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ if (WideVT != SourceValue.getValueType()) {
+ assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
+ "Unexpected store value to merge");
+ SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
}
- if (NeedsBswap)
- CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+ // Before legalize we can introduce illegal bswaps/rotates which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedBswap) {
+ SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
+ } else if (NeedRotate) {
+ assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
+ SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
+ SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
+ }
SDValue NewStore =
- DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
- FirstStore->getPointerInfo(), FirstStore->getAlignment());
+ DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlign());
// Rely on other DAG combine rules to remove the other individual stores.
DAG.ReplaceAllUsesWith(N, NewStore.getNode());
@@ -7036,8 +7306,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
return IsBigEndianTarget
- ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
- : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
Optional<BaseIndexOffset> Base;
@@ -7164,10 +7434,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
- SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), MemVT,
- FirstLoad->getAlignment());
+ SDValue NewLoad =
+ DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
+ Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
@@ -7337,9 +7607,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0.hasOneUse()) {
// FIXME Can we handle multiple uses? Could we token factor the chain
// results from the new/old setcc?
- SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
- N0.getOperand(0),
- N0Opcode == ISD::STRICT_FSETCCS);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
CombineTo(N, SetCC);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
recursivelyDeleteUnusedNodes(N0.getNode());
@@ -7440,12 +7710,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
- if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, DL, VT, S0);
- }
}
}
@@ -7980,10 +8248,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
- auto DL = SDLoc(N);
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
- return DAG.getVScale(DL, VT, C0 << C1);
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
}
return SDValue();
@@ -8032,12 +8299,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (NarrowVT != RightOp.getOperand(0).getValueType())
return SDValue();
- // Only transform into mulh if mulh for the narrow type is cheaper than
- // a multiply followed by a shift. This should also check if mulh is
- // legal for NarrowVT on the target.
- if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
@@ -8055,6 +8316,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target.
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+
SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
RightOp.getOperand(0));
return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
@@ -8556,8 +8821,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
@@ -9154,16 +9419,75 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ return false;
+
+ // For now we check only the LHS of the add.
+ SDValue LHS = Index.getOperand(0);
+ SDValue SplatVal = DAG.getSplatValue(LHS);
+ if (!SplatVal)
+ return false;
+
+ BasePtr = SplatVal;
+ Index = Index.getOperand(1);
+ return true;
+}
+
+// Fold sext/zext of index into index type.
+bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
+ bool Scaled, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (Index.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ return false;
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
+ if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
return SDValue();
}
@@ -9177,6 +9501,14 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, compressing, or truncating stores?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
+ !MST->isTruncatingStore())
+ return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
+ MST->getBasePtr(), MST->getMemOperand());
+
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9187,11 +9519,32 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue BasePtr = MGT->getBasePtr();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+ return CombineTo(N, PassThru, MGT->getChain());
+
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
+
+ if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
return SDValue();
}
@@ -9205,6 +9558,16 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, expanding, or extending loads?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MLD->isUnindexed() && !MLD->isExpandingLoad() &&
+ MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
+ MLD->getBasePtr(), MLD->getMemOperand());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9364,6 +9727,113 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
}
}
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (hasOperation(ISD::UADDSAT, VT)) {
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllOnes(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getOpcode() == ISD::ADD) {
+ SDValue CondLHS = LHS, CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (SatCC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ SatCC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> uaddsat x, y
+ // x+y >= x ? x+y : ~0 --> uaddsat x, y
+ if (SatCC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x >= ~C ? x+C : ~0 --> uaddsat x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (SatCC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Match VSELECTs into sub with unsigned saturation.
+ if (hasOperation(ISD::USUBSAT, VT)) {
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on
+ // the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllZeros(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
+ SDValue CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> usubsat x, y
+ // x > y ? x-y : 0 --> usubsat x, y
+ if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
+ Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
+ if (isa<BuildVectorSDNode>(CondRHS)) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> usubsat x, C
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
+ };
+ if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ OpRHS);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> usubsat x, C
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ OpRHSConst->getAPIntValue().isSignMask()) {
+ // Note that we have to rebuild the RHS constant here to ensure
+ // we don't rely on particular values of undef lanes.
+ OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+ }
+ }
+ }
}
if (SimplifySelectOps(N, N1, N2))
@@ -9722,14 +10192,14 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue BasePtr = LN0->getBasePtr();
for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
const unsigned Offset = Idx * Stride;
- const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+ const Align Align = commonAlignment(LN0->getAlign(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -10146,7 +10616,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- EVT N00VT = N0.getOperand(0).getValueType();
+ EVT N00VT = N00.getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
@@ -10240,6 +10710,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
+ // fold sext (not i1 X) -> add (zext i1 X), -1
+ // TODO: This could be extended to handle bool vectors.
+ if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
+ (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegal(ISD::ADD, VT)))) {
+ // If we can eliminate the 'not', the sext form should be better
+ if (SDValue NewXor = visitXOR(N0.getNode())) {
+ // Returning N0 is a form of in-visit replacement that may have
+ // invalidated N0.
+ if (NewXor.getNode() == N0.getNode()) {
+ // Return SDValue here as the xor should have already been replaced in
+ // this sext.
+ return SDValue();
+ } else {
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
+ }
+ }
+
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -10507,13 +11000,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType());
}
- // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
SDLoc DL(N);
+ EVT N0VT = N0.getValueType();
+ EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
- DAG.getConstant(0, DL, VT),
+ DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getBoolConstant(true, DL, N0VT, N00VT),
+ DAG.getBoolConstant(false, DL, N0VT, N00VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
- return SCC;
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
@@ -10602,22 +11098,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ // on vectors in one instruction, so attempt to fold to zext instead.
+ if (VT.isVector()) {
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+ } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
+ SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
- TLI);
+ DoXform =
+ ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
@@ -10626,8 +11126,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
} else {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -10832,12 +11332,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
uint64_t ShiftAmt = N01->getZExtValue();
- uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
else
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - ShiftAmt);
+ VT.getScalarSizeInBits() - ShiftAmt);
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -10864,12 +11364,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
ShAmt = ConstShift->getZExtValue();
- unsigned EVTBits = ExtVT.getSizeInBits();
+ unsigned EVTBits = ExtVT.getScalarSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
- if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
+ if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
return SDValue();
}
@@ -10899,7 +11399,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
// If the mask is smaller, recompute the type.
- if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
ExtVT = MaskedVT;
}
@@ -10930,8 +11430,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ unsigned LVTStoreBits =
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
@@ -10941,13 +11442,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ShAmt = AdjustBigEndianShift(ShAmt);
uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
+ TypeSize::Fixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10969,13 +11470,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Result = Load;
if (ShLeftAmt != 0) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
- if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- if (ShLeftAmt >= VT.getSizeInBits())
+ if (ShLeftAmt >= VT.getScalarSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
Result = DAG.getNode(ISD::SHL, DL, VT,
@@ -11125,6 +11626,41 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+ // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
+ // ignore it if the masked load is already sign extended
+ if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
+ if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
+ Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
+ SDValue ExtMaskedLoad = DAG.getMaskedLoad(
+ VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
+ Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
+ Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
+ CombineTo(N, ExtMaskedLoad);
+ CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ if (SDValue(GN0, 0).hasOneUse() &&
+ ExtVT == GN0->getMemoryVT() &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
+
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -11225,10 +11761,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
- unsigned NumElem = VecTy.getVectorNumElements();
+ auto EltCnt = VecTy.getVectorElementCount();
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+ auto NewEltCnt = EltCnt * SizeRatio;
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
SDValue EltNo = N0->getOperand(1);
@@ -11342,8 +11879,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isSimple() &&
- LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
@@ -11372,9 +11908,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Stop if more than one members are non-undef.
if (NumDefs > 1)
break;
+
VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
VT.getVectorElementType(),
- X.getValueType().getVectorNumElements()));
+ X.getValueType().getVectorElementCount()));
}
if (NumDefs == 0)
@@ -11415,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
@@ -11643,7 +12179,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->getAlignment(),
+ LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11990,7 +12526,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
- SDNodeFlags Flags = N->getFlags();
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
@@ -12023,15 +12558,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
@@ -12054,8 +12589,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue B = FMA.getOperand(1);
SDValue C = FMA.getOperand(2).getOperand(0);
SDValue D = FMA.getOperand(2).getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
- return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12067,10 +12602,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ N1);
}
}
@@ -12082,10 +12616,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
+ N0);
}
}
@@ -12093,14 +12626,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
+ auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ Z));
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -12111,7 +12643,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12121,16 +12653,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -12141,7 +12671,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12157,7 +12687,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12176,7 +12706,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12234,8 +12764,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
- XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
- Flags);
+ XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
@@ -12246,7 +12775,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
- YZ.getOperand(1), X, Flags);
+ YZ.getOperand(1), X);
}
return SDValue();
};
@@ -12277,7 +12806,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12290,11 +12819,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
}
@@ -12306,13 +12833,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0, Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -12329,13 +12854,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12353,13 +12877,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12371,13 +12894,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -12387,13 +12909,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N20),
- N21, N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
@@ -12407,15 +12927,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12433,18 +12951,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12460,16 +12975,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
@@ -12490,18 +13002,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
@@ -12517,7 +13026,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
- const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -12549,56 +13057,56 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
if (C->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1, Flags))
+ if (SDValue FMA = FuseFADD(N0, N1))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0, Flags))
+ if (SDValue FMA = FuseFADD(N1, N0))
return FMA;
// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
if (C0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y, Flags);
+ Y);
if (C0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
if (C1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
}
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1, Flags))
+ if (SDValue FMA = FuseFSUB(N0, N1))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0, Flags))
+ if (SDValue FMA = FuseFSUB(N1, N0))
return FMA;
return SDValue();
@@ -12607,12 +13115,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = N->getFlags();
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12624,11 +13133,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
@@ -12643,13 +13152,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
// fold (fadd (fneg A), B) -> (fsub B, A)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
N0, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12661,14 +13170,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
if (isFMulNegTwo(N0)) {
SDValue B = N0.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
}
// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
if (isFMulNegTwo(N1)) {
SDValue B = N1.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
}
// No FP constant should be created after legalization as Instruction
@@ -12694,9 +13203,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
}
// We can fold chains of FADD's of the same value into multiplications.
@@ -12704,14 +13213,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
- bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
@@ -12719,20 +13228,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
- bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
@@ -12740,28 +13249,28 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
if (N0.getOpcode() == ISD::FADD) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
if (N1.getOpcode() == ISD::FADD) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
@@ -12771,7 +13280,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
- DAG.getConstantFP(4.0, DL, VT), Flags);
+ DAG.getConstantFP(4.0, DL, VT));
}
}
} // enable-unsafe-fp-math
@@ -12784,6 +13293,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N0 = N->getOperand(1);
+ SDValue N1 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT ChainVT = N->getValueType(1);
+ SDLoc DL(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N0, NegN1});
+ }
+
+ // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N1, NegN0});
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12793,6 +13329,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12804,7 +13341,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12824,18 +13361,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// (fsub -0.0, N1) -> -N1
- // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
- // FSUB does not specify the sign bit of a NaN. Also note that for
- // the same reason, the inverse transform is not safe, unless fast math
- // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return NegN1;
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+ // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+ // flushed to zero, unless all users treat denorms as zero (DAZ).
+ // FIXME: This transform will change the sign of a NaN and the behavior
+ // of a signaling NaN. It is only valid when a NoNaN flag is present.
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1);
+ }
}
}
@@ -12844,16 +13384,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
// X - (Y + X) -> -Y
if (N0 == N1->getOperand(1))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12873,6 +13413,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12886,35 +13427,28 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
// canonicalize constant to RHS
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
- (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
- // fold (fmul A, 0) -> 0
- if (N1CFP && N1CFP->isZero())
- return N1;
- }
-
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
- if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
// Avoid an infinite loop by making sure that N00 is not a constant
// (the inner multiply has not been constant folded yet).
- if (isConstantFPBuildVectorOrConstantFP(N01) &&
- !isConstantFPBuildVectorOrConstantFP(N00)) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
}
}
@@ -12923,14 +13457,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -12949,7 +13483,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -13015,10 +13549,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
-
// FMA nodes have flags that propagate to the created nodes.
- const SDNodeFlags Flags = N->getFlags();
- bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ bool UnsafeFPMath =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
@@ -13039,7 +13574,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
@@ -13047,51 +13582,45 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
- // TODO: The FMA node should have flags that propagate to these nodes.
+
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
- Flags), Flags);
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
if (N0.getOpcode() == ISD::FMUL &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, DL, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
- Flags),
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
N2);
}
}
- // (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
@@ -13101,25 +13630,23 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(-1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
}
}
@@ -13128,7 +13655,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
@@ -13149,14 +13676,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
- SDValue N0 = N->getOperand(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
- SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
// For splat vectors, scale the number of uses by the splat factor. If we can
@@ -13174,6 +13700,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
+ if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
+ U->getOperand(0) == U->getOperand(1).getOperand(0) &&
+ U->getFlags().hasAllowReassociation() &&
+ U->getFlags().hasNoSignedZeros())
+ continue;
+
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
@@ -13215,6 +13748,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -13226,7 +13760,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13251,29 +13785,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT), Flags);
+ DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -13288,29 +13822,34 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
if (Sqrt.getNode()) {
// If the other multiply operand is known positive, pull it into the
- // sqrt. That will eliminate the division if we convert to an estimate:
- // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
- // TODO: Also fold the case where A == Z (fabs is missing).
+ // sqrt. That will eliminate the division if we convert to an estimate.
if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
- N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
- Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
- SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
- Y.getOperand(0), Flags);
- SDValue AAZ =
- DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
-
- // Estimate creation failed. Clean up speculatively created nodes.
- recursivelyDeleteUnusedNodes(AAZ.getNode());
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
+ SDValue A;
+ if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
+ A = Y.getOperand(0);
+ else if (Y == Sqrt.getOperand(0))
+ A = Y;
+ if (A) {
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
}
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
- SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
}
}
}
@@ -13321,6 +13860,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return RV;
}
+ // Fold X/Sqrt(X) -> Sqrt(X)
+ if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+ (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
+ if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
+ return N1;
+
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
@@ -13333,7 +13878,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
return SDValue();
}
@@ -13345,13 +13890,14 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13365,7 +13911,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
- if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ if (!Flags.hasApproximateFuncs() ||
(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
@@ -13374,6 +13920,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
+ // transform the fdiv, we may produce a sub-optimal estimate sequence
+ // because the reciprocal calculation may not have to filter out a
+ // 0.0 input.
return buildSqrtEstimate(N0, Flags);
}
@@ -13397,8 +13947,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
@@ -13445,6 +13995,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
if (!ExponentC)
return SDValue();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Try to convert x ** (1/3) into cube root.
// TODO: Handle the various flavors of long double.
@@ -13471,7 +14022,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
return SDValue();
- return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
}
// Try to convert x ** (1/4) and x ** (3/4) into square roots.
@@ -13506,12 +14057,12 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
- SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
if (ExponentIs025)
return SqrtSqrt;
// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
- return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
}
return SDValue();
@@ -13694,7 +14245,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_sint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13709,7 +14260,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_uint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13781,7 +14332,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
@@ -13829,7 +14380,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
@@ -13840,7 +14391,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
// fold ftrunc (known rounded int x) -> x
@@ -13864,19 +14415,19 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
}
-// FIXME: FNEG and FABS have a lot in common; refactor.
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Constant fold FNEG.
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (SDValue NegN0 =
@@ -13891,51 +14442,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0), N->getFlags());
- }
-
- // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
- // constant pool values.
- if (!TLI.isFNegFree(VT) &&
- N0.getOpcode() == ISD::BITCAST &&
- N0.getNode()->hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x80... per scalar element
- // and splat it.
- SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x80...
- SignMask = APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL0(N0);
- Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
- DAG.getConstant(SignMask, DL0, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(VT, Int);
- }
- }
-
- // (fneg (fmul c, x)) -> (fmul -c, x)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
- ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
- if (CFP1) {
- APFloat CVal = CFP1->getValueAPF();
- CVal.changeSign();
- if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
- N0->getFlags());
- }
+ N0.getOperand(0));
}
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
+
return SDValue();
}
@@ -13946,6 +14458,11 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
EVT VT = N->getValueType(0);
const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opc = N->getOpcode();
+ bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
+ bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
@@ -13954,10 +14471,39 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
}
// Canonicalize to constant on RHS.
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ if (N1CFP) {
+ const APFloat &AF = N1CFP->getValueAPF();
+
+ // minnum(X, nan) -> X
+ // maxnum(X, nan) -> X
+ // minimum(X, nan) -> nan
+ // maximum(X, nan) -> nan
+ if (AF.isNaN())
+ return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
+
+ // In the following folds, inf can be replaced with the largest finite
+ // float, if the ninf flag is set.
+ if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
+ // minnum(X, -inf) -> -inf
+ // maxnum(X, +inf) -> +inf
+ // minimum(X, -inf) -> -inf if nnan
+ // maximum(X, +inf) -> +inf if nnan
+ if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(1);
+
+ // minnum(X, +inf) -> X if nnan
+ // maxnum(X, -inf) -> X if nnan
+ // minimum(X, +inf) -> X
+ // maximum(X, -inf) -> X
+ if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(0);
+ }
+ }
+
return SDValue();
}
@@ -13982,7 +14528,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fabs c1) -> fabs(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -13994,28 +14540,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
- if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x7f... per scalar element
- // and splat it.
- SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x7f...
- SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL(N0);
- Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
- DAG.getConstant(SignMask, DL, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(N->getValueType(0), Int);
- }
- }
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
return SDValue();
}
@@ -14025,6 +14551,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
+ // nondeterministic jumps).
+ if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ N1->getOperand(0), N2);
+ }
+
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
@@ -14178,63 +14711,6 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
-/// Return true if 'Use' is a load or a store that uses N as its base pointer
-/// and that N may be folded in the load / store addressing mode.
-static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT;
- unsigned AS;
-
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else
- return false;
-
- TargetLowering::AddrMode AM;
- if (N->getOpcode() == ISD::ADD) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else if (N->getOpcode() == ISD::SUB) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = -Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else
- return false;
-
- return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
- VT.getTypeForEVT(*DAG.getContext()), AS);
-}
-
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
bool &IsLoad, bool &IsMasked, SDValue &Ptr,
const TargetLowering &TLI) {
@@ -14463,16 +14939,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
- ConstantSDNode *CN =
- cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
- int X0, X1, Y0, Y1;
+ auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
const APInt &Offset0 = CN->getAPIntValue();
- APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
-
- X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
- Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
- X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
- Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+ const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+ int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
@@ -14664,8 +15137,8 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
-static inline int numVectorEltsOrZero(EVT T) {
- return T.isVector() ? T.getVectorNumElements() : 0;
+static inline ElementCount numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
@@ -14733,6 +15206,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
EVT STMemType = ST->getMemoryVT();
EVT STType = ST->getValue().getValueType();
+ // There are two cases to consider here:
+ // 1. The store is fixed width and the load is scalable. In this case we
+ // don't know at compile time if the store completely envelops the load
+ // so we abandon the optimisation.
+ // 2. The store is scalable and the load is fixed width. We could
+ // potentially support a limited number of cases here, but there has been
+ // no cost-benefit analysis to prove it's worth it.
+ bool LdStScalable = LDMemType.isScalableVector();
+ if (LdStScalable != STMemType.isScalableVector())
+ return SDValue();
+
+ // If we are dealing with scalable vectors on a big endian platform the
+ // calculation of offsets below becomes trickier, since we do not know at
+ // compile time the absolute size of the vector. Until we've done more
+ // analysis on big-endian platforms it seems better to bail out for now.
+ if (LdStScalable && DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
@@ -14744,13 +15235,21 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = ((int64_t)STMemType.getStoreSizeInBits() -
- (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
+ 8 -
+ Offset;
// Check that the stored value cover all bits that are loaded.
- bool STCoversLD =
- (Offset >= 0) &&
- (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+ bool STCoversLD;
+
+ TypeSize LdMemSize = LDMemType.getSizeInBits();
+ TypeSize StMemSize = STMemType.getSizeInBits();
+ if (LdStScalable)
+ STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
+ else
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
+ StMemSize.getFixedSize());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
@@ -14771,15 +15270,15 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding
- if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ if (LDType.getSizeInBits() == LdMemSize)
return ReplaceLd(LD, ST->getValue(), Chain);
// Can we model the truncate and extension with an and mask?
if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
// Mask to size of LDMemType
auto Mask =
- DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
- STMemType.getSizeInBits()),
+ DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
+ StMemSize.getFixedSize()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
@@ -15602,8 +16101,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
- unsigned NewAlign = St->getAlignment();
-
if (DAG.getDataLayout().isLittleEndian())
StOffset = ByteShift;
else
@@ -15612,8 +16109,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
- NewAlign = MinAlign(NewAlign, StOffset);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
// Truncate down to the new size.
@@ -15622,7 +16118,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign);
+ St->getPointerInfo().getWithOffset(StOffset),
+ St->getOriginalAlign());
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -15726,7 +16223,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -16034,9 +16532,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlignment());
+ NewStore =
+ DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstInChain->getAlign());
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -16048,8 +16546,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Replace all merged stores with the new store.
@@ -16064,23 +16561,19 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
- // pointer.
+ // pointer. We must have a base and an offset. Do not handle stores to undef
+ // base pointers.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- EVT MemVT = St->getMemoryVT();
-
- SDValue Val = peekThroughBitcasts(St->getValue());
- // We must have a base and an offset.
- if (!BasePtr.getBase().getNode())
- return;
-
- // Do not handle stores to undef base pointers.
- if (BasePtr.getBase().isUndef())
+ if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
return;
+ SDValue Val = peekThroughBitcasts(St->getValue());
StoreSource StoreSrc = getStoreSource(Val);
assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
- BaseIndexOffset LBasePtr;
+
// Match on loadbaseptr if relevant.
+ EVT MemVT = St->getMemoryVT();
+ BaseIndexOffset LBasePtr;
EVT LoadVT;
if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
@@ -16101,7 +16594,7 @@ void DAGCombiner::getStoreMergeCandidates(
int64_t &Offset) -> bool {
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
- if (!Other->isSimple() || Other->isIndexed())
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -16110,37 +16603,38 @@ void DAGCombiner::getStoreMergeCandidates(
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
- if (StoreSrc == StoreSource::Load) {
+ switch (StoreSrc) {
+ case StoreSource::Load: {
if (NoTypeMatch)
return false;
- // The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
- BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
- if (LoadVT != OtherLd->getMemoryVT())
- return false;
- // Loads must only have one use.
- if (!OtherLd->hasNUsesOfValue(1, 0))
- return false;
- // The memory operands must not be volatile/indexed/atomic.
- // TODO: May be able to relax for unordered atomics (see D66309)
- if (!OtherLd->isSimple() ||
- OtherLd->isIndexed())
- return false;
- // Don't mix temporal loads with non-temporal loads.
- if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
- return false;
- if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
- return false;
- } else
+ // The Load's Base Ptr must also match.
+ auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
+ if (!OtherLd)
+ return false;
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() || OtherLd->isIndexed())
return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ break;
}
- if (StoreSrc == StoreSource::Constant) {
+ case StoreSource::Constant:
if (NoTypeMatch)
return false;
if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
- }
- if (StoreSrc == StoreSource::Extract) {
+ break;
+ case StoreSource::Extract:
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
@@ -16149,6 +16643,9 @@ void DAGCombiner::getStoreMergeCandidates(
if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ break;
+ default:
+ llvm_unreachable("Unhandled store source for merging");
}
Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
@@ -16159,11 +16656,22 @@ void DAGCombiner::getStoreMergeCandidates(
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
SDNode *RootNode) -> bool {
auto RootCount = StoreRootCountMap.find(StoreNode);
- if (RootCount != StoreRootCountMap.end() &&
- RootCount->second.first == RootNode &&
- RootCount->second.second > StoreMergeDependenceLimit)
- return true;
- return false;
+ return RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit;
+ };
+
+ auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
+ // This must be a chain use.
+ if (UseIter.getOperandNo() != 0)
+ return;
+ if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherStore, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
+ }
};
// We looking for a root node which is an ancestor to all mergable
@@ -16185,31 +16693,21 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
unsigned NumNodesExplored = 0;
- if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ const unsigned MaxSearchNodes = 1024;
+ if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
- if (I2.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
- } else
+ TryToAddCandidate(I2);
+ }
+ }
+ } else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
+ TryToAddCandidate(I);
+ }
}
// We need to check that merging these stores does not cause a loop in
@@ -16579,7 +17077,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
// Scan the memory operations on the chain and find the first
@@ -16674,7 +17172,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// the NumElem refers to array/index size.
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
+ Align FirstLoadAlign = FirstLoad->getAlign();
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
@@ -16686,8 +17184,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
@@ -16760,11 +17258,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ NewStore = DAG.getTruncStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
@@ -16966,17 +17463,15 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
- Alignment = MinAlign(Alignment, 4U);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
@@ -17037,7 +17532,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return NewST;
// Try transforming several stores into STORE (BSWAP).
- if (SDValue Store = MatchStoreCombine(ST))
+ if (SDValue Store = mergeTruncStores(ST))
return Store;
if (ST->isUnindexed()) {
@@ -17110,11 +17605,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
!ST1->getBasePtr().isUndef() &&
// BaseIndexOffset and the code below requires knowing the size
// of a vector, so bail out if MemoryVT is scalable.
+ !ST->getMemoryVT().isScalableVector() &&
!ST1->getMemoryVT().isScalableVector()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
- unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
- unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
// If this is a store who's preceding store to a subset of the current
// location and no one other node is chained to that store we can
// effectively drop the store. Do not remove stores to undef as they may
@@ -17185,8 +17681,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// We walk up the chains to find stores.
SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
while (!Chains.empty()) {
- SDValue Chain = Chains.back();
- Chains.pop_back();
+ SDValue Chain = Chains.pop_back_val();
if (!Chain.hasOneUse())
continue;
switch (Chain.getOpcode()) {
@@ -17206,11 +17701,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// TODO: Can relax for unordered atomics (see D66309)
if (!ST->isSimple() || ST->isIndexed())
continue;
+ const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
+ // The bounds of a scalable store are not known until runtime, so this
+ // store cannot be elided.
+ if (StoreSize.isScalable())
+ continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
- ST->getMemoryVT().getStoreSizeInBits())) {
+ StoreSize.getFixedSize() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
@@ -17309,7 +17809,6 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return SDValue();
// Start to split store.
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
@@ -17322,13 +17821,12 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
SDValue Ptr = ST->getBasePtr();
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
// Higher value store.
- SDValue St1 =
- DAG.getStore(St0, DL, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
- Alignment / 2, MMOFlags, AAInfo);
+ SDValue St1 = DAG.getStore(
+ St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return St1;
}
@@ -17566,6 +18064,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltVT.isByteSized())
+ return SDValue();
+
Align Alignment = OriginalLoad->getAlign();
Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
@@ -18201,20 +18706,24 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// operands will all be based off of VecIn1, even those in VecIn2.
unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVT1Size = InVT1.getFixedSizeInBits();
+ uint64_t InVT2Size = InVT2.getFixedSizeInBits();
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
- if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
// If the output vector length is a multiple of both input lengths,
// we can concatenate them and pad the rest with undefs.
- unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ unsigned NumConcats = VTSize / InVT1Size;
assert(NumConcats >= 2 && "Concat needs at least two inputs!");
SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
ConcatOps[0] = VecIn1;
ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
- } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ } else if (InVT1Size == VTSize * 2) {
if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
@@ -18227,7 +18736,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
Vec2Offset = NumElems;
- } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ } else if (InVT2Size <= InVT1Size) {
// VecIn1 is wider than the output, and we have another, possibly
// smaller input. Pad the smaller input with undefs, shuffle at the
// input vector width, and extract the output.
@@ -18252,8 +18761,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
- } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
- InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
@@ -18444,8 +18952,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Have we seen this input vector before?
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
// a map back from SDValues to numbers isn't worth it.
- unsigned Idx = std::distance(
- VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
if (Idx == VecIn.size())
VecIn.push_back(ExtractedFromVec);
@@ -18795,6 +19302,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
EVT OpVT = N->getOperand(0).getValueType();
+
+ // We currently can't generate an appropriate shuffle for a scalable vector.
+ if (VT.isScalableVector())
+ return SDValue();
+
int NumElts = VT.getVectorNumElements();
int NumOpElts = OpVT.getVectorNumElements();
@@ -18898,7 +19410,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
// check the other type in the cast to make sure this is really legal.
EVT VT = N->getValueType(0);
EVT SrcEltVT = SrcVT.getVectorElementType();
- unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
+ ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (CastOpcode) {
@@ -18935,9 +19447,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getUNDEF(VT);
// Optimize concat_vectors where all but the first of the vectors are undef.
- if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
- return Op.isUndef();
- })) {
+ if (all_of(drop_begin(N->ops()),
+ [](const SDValue &Op) { return Op.isUndef(); })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
@@ -19055,11 +19566,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return V;
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
- // nodes often generate nop CONCAT_VECTOR nodes.
- // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
- // place the incoming vectors at the exact same location.
+ // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
+ // operands and look for a CONCAT operations that place the incoming vectors
+ // at the exact same location.
+ //
+ // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
SDValue SingleSource = SDValue();
- unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned PartNumElem =
+ N->getOperand(0).getValueType().getVectorMinNumElements();
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
@@ -19107,15 +19621,16 @@ static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
V.getOperand(0).getValueType() == SubVT &&
- (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
return V.getOperand(SubIdx);
}
return SDValue();
}
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
@@ -19129,7 +19644,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SDValue Index = Extract->getOperand(1);
EVT SubVT = Extract->getValueType(0);
- if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
return SDValue();
SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
@@ -19150,11 +19665,12 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
-static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
+ bool LegalOperations) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
- if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
return V;
// The extract index must be a constant, so we can map it to a concat operand.
@@ -19181,7 +19697,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
- if (!WideBVT.isVector())
+ // The optimisations below currently assume we are dealing with fixed length
+ // vectors. It is possible to add support for scalable vectors, but at the
+ // moment we've done no analysis to prove whether they are profitable or not.
+ if (!WideBVT.isFixedLengthVector())
return SDValue();
EVT VT = Extract->getValueType(0);
@@ -19296,19 +19815,15 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
unsigned Index = ExtIdx->getZExtValue();
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
- // If the index is a multiple of the extract element count, we can offset the
- // address by the store size multiplied by the subvector index. Otherwise if
- // the scalar type is byte sized, we can just use the index multiplied by
- // the element size in bytes as the offset.
- unsigned Offset;
- if (Index % NumElts == 0)
- Offset = (Index / NumElts) * VT.getStoreSize();
- else if (VT.getScalarType().isByteSized())
- Offset = Index * VT.getScalarType().getStoreSize();
- else
- return SDValue();
+ // The definition of EXTRACT_SUBVECTOR states that the index must be a
+ // multiple of the minimum number of elements in the result type.
+ assert(Index % NumElts == 0 && "The extract subvector index is not a "
+ "multiple of the result's element count");
+
+ // It's fine to use TypeSize here as we know the offset will not be negative.
+ TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
@@ -19317,13 +19832,21 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
- SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
+
+ uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
- VT.getStoreSize());
+ MachineMemOperand *MMO;
+ if (Offset.isScalable()) {
+ MachinePointerInfo MPI =
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
+ } else
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
+ StoreSize);
+
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;
@@ -19376,8 +19899,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
- ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
+ if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
+ ElementCount NewExtEC =
+ NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
EVT ScalarVT = SrcVT.getScalarType();
if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
@@ -19391,7 +19915,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
- if (NewExtEC == 1 &&
+ if (NewExtEC.isScalar() &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract =
@@ -19496,7 +20020,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
N->getOperand(1));
}
- if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
@@ -20274,52 +20798,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- // Canonicalize shuffles according to rules:
- // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
- // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
- // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
- N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- TLI.isTypeLegal(VT)) {
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(N1->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SDValue SV0 = N1->getOperand(0);
- SDValue SV1 = N1->getOperand(1);
- bool HasSameOp0 = N0 == SV0;
- bool IsSV1Undef = SV1.isUndef();
- if (HasSameOp0 || IsSV1Undef || N0 == SV1)
- // Commute the operands of this shuffle so that next rule
- // will trigger.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so merging below will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Canonicalize splat shuffles to the RHS to improve merging below.
+ // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(N0)->isSplat() &&
+ !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
return DAG.getCommutedVectorShuffle(*SVN);
+ }
}
- // Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // Don't try to fold shuffles with illegal type.
- // Only fold if this shuffle is the only user of the other shuffle.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
- Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask).
+ auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask) -> bool {
// Don't try to fold splats; they're likely to simplify somehow, or they
// might be free.
- if (OtherSV->isSplat())
- return SDValue();
+ if (OtherSVN->isSplat())
+ return false;
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
+ SV0 = SV1 = SDValue();
+ Mask.clear();
- SDValue SV0, SV1;
- SmallVector<int, 4> Mask;
- // Compute the combined shuffle mask for a shuffle with SV0 as the first
- // operand, and SV1 as the second operand.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx < 0) {
@@ -20332,15 +20856,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (Idx < (int)NumElts) {
// This shuffle index refers to the inner shuffle N0. Lookup the inner
// shuffle mask to identify which vector is actually referenced.
- Idx = OtherSV->getMaskElt(Idx);
+ Idx = OtherSVN->getMaskElt(Idx);
if (Idx < 0) {
// Propagate Undef.
Mask.push_back(Idx);
continue;
}
-
- CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
- : OtherSV->getOperand(1);
+ CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
+ : OtherSVN->getOperand(1);
} else {
// This shuffle index references an element within N1.
CurrentVec = N1;
@@ -20362,38 +20885,82 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Mask.push_back(Idx);
continue;
}
+ if (!SV1.getNode() || SV1 == CurrentVec) {
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ continue;
+ }
- // Bail out if we cannot convert the shuffle pair into a single shuffle.
- if (SV1.getNode() && SV1 != CurrentVec)
- return SDValue();
+ // Last chance - see if the vector is another shuffle and if it
+ // uses one of the existing candidate shuffle ops.
+ if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
+ int InnerIdx = CurrentSVN->getMaskElt(Idx);
+ if (InnerIdx < 0) {
+ Mask.push_back(-1);
+ continue;
+ }
+ SDValue InnerVec = (InnerIdx < (int)NumElts)
+ ? CurrentSVN->getOperand(0)
+ : CurrentSVN->getOperand(1);
+ if (InnerVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+ InnerIdx %= NumElts;
+ if (InnerVec == SV0) {
+ Mask.push_back(InnerIdx);
+ continue;
+ }
+ if (InnerVec == SV1) {
+ Mask.push_back(InnerIdx + NumElts);
+ continue;
+ }
+ }
- // Ok. CurrentVec is the right hand side.
- // Update the mask accordingly.
- SV1 = CurrentVec;
- Mask.push_back(Idx + NumElts);
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ return false;
}
+ return true;
+ };
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- bool isUndefMask = true;
- for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
- isUndefMask &= Mask[i] < 0;
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- if (isUndefMask)
- return DAG.getUNDEF(VT);
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) {
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return DAG.getUNDEF(VT);
- if (!SV0.getNode())
- SV0 = DAG.getUNDEF(VT);
- if (!SV1.getNode())
- SV1 = DAG.getUNDEF(VT);
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
- // Avoid introducing shuffles with illegal mask.
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ }
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -20478,8 +21045,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
- N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
- VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
@@ -20496,7 +21063,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT CN1VT = CN1.getValueType();
if (CN0VT.isVector() && CN1VT.isVector() &&
CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
- CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
+ CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
@@ -20535,7 +21102,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDLoc DL(N);
SDValue NewIdx;
LLVMContext &Ctx = *DAG.getContext();
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount NumElts = VT.getVectorElementCount();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
@@ -20543,8 +21110,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
- if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
- NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
+ NumElts.divideCoefficientBy(Scale));
NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
@@ -20576,8 +21144,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// If the input vector is a concatenation, and the insert replaces
// one of the pieces, we can optimize into a single concat_vectors.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
- N0.getOperand(0).getValueType() == N1.getValueType()) {
- unsigned Factor = N1.getValueType().getVectorNumElements();
+ N0.getOperand(0).getValueType() == N1.getValueType() &&
+ N0.getOperand(0).getValueType().isScalableVector() ==
+ N1.getValueType().isScalableVector()) {
+ unsigned Factor = N1.getValueType().getVectorMinNumElements();
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -20621,7 +21191,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
unsigned Opcode = N->getOpcode();
// VECREDUCE over 1-element vector is just an extract.
- if (VT.getVectorNumElements() == 1) {
+ if (VT.getVectorElementCount().isScalar()) {
SDLoc dl(N);
SDValue Res =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
@@ -20860,7 +21430,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue Z = LHS.getOperand(2);
EVT NarrowVT = X.getValueType();
if (NarrowVT == Y.getValueType() &&
- TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
+ LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
SDLoc DL(N);
SDValue VecC =
@@ -20873,11 +21444,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// Make sure all but the first op are undef or constant.
auto ConcatWithConstantOrUndef = [](SDValue Concat) {
return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
- std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
- [](const SDValue &Op) {
- return Op.isUndef() ||
- ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
- });
+ all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
};
// The following pattern is likely to emerge with vector reduction ops. Moving
@@ -21099,7 +21669,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// It is safe to replace the two loads if they have different alignments,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
- unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
@@ -21205,6 +21775,46 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
+SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool IsFabs = N->getOpcode() == ISD::FABS;
+ bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+
+ if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+
+ // The operand to cast should be integer.
+ if (!IntVT.isInteger() || IntVT.isVector())
+ return SDValue();
+
+ // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
+ // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For vector, create a sign mask (0x80...) or its inverse (for fabs,
+ // 0x7f...) per element and splat it.
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+}
+
/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
/// in it. This may be a win when the constant is not otherwise available
@@ -21486,9 +22096,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
EVT VT = V.getValueType();
- unsigned EltBits = VT.getScalarSizeInBits();
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
- SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
return LogBase2;
}
@@ -21666,37 +22275,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
Reciprocal)) {
AddToWorklist(Est.getNode());
- if (Iterations) {
+ if (Iterations)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
-
- if (!Reciprocal) {
- // The estimate is now completely wrong if the input was exactly 0.0 or
- // possibly a denormal. Force the answer to 0.0 for those cases.
- SDLoc DL(Op);
- EVT CCVT = getSetCCResultType(VT);
- ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- DenormalMode DenormMode = DAG.getDenormalMode(VT);
- if (DenormMode.Input == DenormalMode::IEEE) {
- // This is specifically a check for the handling of denormal inputs,
- // not the result.
-
- // fabs(X) < SmallestNormal ? 0.0 : Est
- const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
- APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
- SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
- SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
- Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- } else {
- // X == 0.0 ? 0.0 : Est
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- }
- }
+ if (!Reciprocal) {
+ SDLoc DL(Op);
+ // Try the target specific test first.
+ SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
+
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 or value provided by
+ // target for those cases.
+ Est = DAG.getNode(
+ Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
}
return Est;
}