aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3271
1 files changed, 2357 insertions, 914 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c97721..49c922f560fa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,9 +1,8 @@
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -112,6 +111,10 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
namespace {
class DAGCombiner {
@@ -138,6 +141,10 @@ namespace {
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
+ /// This records all nodes attempted to add to the worklist since we
+ /// considered a new worklist entry. As we keep do not add duplicate nodes
+ /// in the worklist, this is different from the tail of the worklist.
+ SmallSetVector<SDNode *, 32> PruningList;
/// Set of nodes which have been combined (at least once).
///
@@ -155,6 +162,37 @@ namespace {
AddToWorklist(Node);
}
+ // Prune potentially dangling nodes. This is called after
+ // any visit to a node, but should also be called during a visit after any
+ // failed combine which may have created a DAG node.
+ void clearAddedDanglingWorklistEntries() {
+ // Check any nodes added to the worklist to see if they are prunable.
+ while (!PruningList.empty()) {
+ auto *N = PruningList.pop_back_val();
+ if (N->use_empty())
+ recursivelyDeleteUnusedNodes(N);
+ }
+ }
+
+ SDNode *getNextWorklistEntry() {
+ // Before we do any work, remove nodes that are not in use.
+ clearAddedDanglingWorklistEntries();
+ SDNode *N = nullptr;
+ // The Worklist holds the SDNodes in order, but it may contain null
+ // entries.
+ while (!N && !Worklist.empty()) {
+ N = Worklist.pop_back_val();
+ }
+
+ if (N) {
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+ }
+ return N;
+ }
+
/// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
@@ -162,7 +200,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+ ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
MaximumLegalStoreInBits = 0;
for (MVT VT : MVT::all_valuetypes())
@@ -172,6 +210,11 @@ namespace {
MaximumLegalStoreInBits = VT.getSizeInBits();
}
+ void ConsiderForPruning(SDNode *N) {
+ // Mark this for potential pruning.
+ PruningList.insert(N);
+ }
+
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
@@ -183,6 +226,8 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
+ ConsiderForPruning(N);
+
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
}
@@ -190,6 +235,7 @@ namespace {
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
+ PruningList.remove(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -229,8 +275,15 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt Demanded = APInt::getAllOnesValue(BitWidth);
- return SimplifyDemandedBits(Op, Demanded);
+ APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, DemandedBits);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
}
/// Check the specified vector node value to see if it can be simplified or
@@ -238,12 +291,13 @@ namespace {
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt Demanded = APInt::getAllOnesValue(NumElts);
- return SimplifyDemandedVectorElts(Op, Demanded);
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, DemandedElts);
}
- bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
- bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
@@ -291,15 +345,16 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
- SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDSAT(SDNode *N);
SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
- SDValue visitUADDO(SDNode *N);
+ SDValue visitADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitSUBC(SDNode *N);
- SDValue visitUSUBO(SDNode *N);
+ SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
@@ -316,8 +371,7 @@ namespace {
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
- SDValue visitSMULO(SDNode *N);
- SDValue visitUMULO(SDNode *N);
+ SDValue visitMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
@@ -386,6 +440,7 @@ namespace {
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
SDValue visitSTORE(SDNode *N);
+ SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
@@ -400,13 +455,19 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitVECREDUCE(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -466,6 +527,7 @@ namespace {
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
+ SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -475,7 +537,8 @@ namespace {
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
- SDValue VecIn2, unsigned LeftIdx);
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
@@ -484,7 +547,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+ bool isAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -642,6 +705,18 @@ public:
}
};
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistInserter(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ // FIXME: Ideally we could add N to the worklist, but this causes exponential
+ // compile time costs in large DAGs, e.g. Halide.
+ void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -697,20 +772,23 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
+ bool ForCodeSize,
unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return 2;
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
// Don't allow anything with multiple uses unless we know it is free.
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse())
- if (!(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
+ if (!Op.hasOneUse() &&
+ !(Op.getOpcode() == ISD::FP_EXTEND &&
+ TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
// Don't recurse exponentially.
- if (Depth > 6) return 0;
+ if (Depth > 6)
+ return 0;
switch (Op.getOpcode()) {
default: return false;
@@ -721,7 +799,25 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
}
case ISD::FADD:
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
@@ -733,15 +829,14 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath &&
- !Flags.hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -751,30 +846,31 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
}
}
/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, unsigned Depth = 0) {
- const TargetOptions &Options = DAG.getTarget().Options;
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
-
- const SDNodeFlags Flags = Op.getNode()->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = Op->getFlags();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
@@ -783,24 +879,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
case ISD::FADD:
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(0), Flags);
case ISD::FSUB:
// fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
if (N0CFP->isZero())
return Op.getOperand(1);
@@ -812,28 +925,33 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1), Flags);
+ LegalOperations, ForCodeSize,
+ Depth + 1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, ForCodeSize,
+ Depth + 1));
case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
- Op.getOperand(1));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
}
}
@@ -924,53 +1042,113 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1, SDNodeFlags Flags) {
- // Don't reassociate reductions.
- if (Flags.hasVectorReduction())
- return SDValue();
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDValue N0,
+ SDValue N1) {
+ // Currently this only tries to ensure we don't undo the GEP splits done by
+ // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+ // we check if the following transformation would be problematic:
+ // (load/store (add, (add, x, offset1), offset2)) ->
+ // (load/store (add, x, offset1+offset2)).
- EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- return SDValue();
- }
- if (N0.hasOneUse()) {
- // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
- }
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ return false;
+
+ if (N0.hasOneUse())
+ return false;
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N1);
+ if (!C1 || !C2)
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt &C2APIntVal = C2->getAPIntValue();
+ if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ return false;
+
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getBitWidth() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N0->uses()) {
+ auto LoadStore = dyn_cast<MemSDNode>(Node);
+ if (LoadStore) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
}
}
- if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
+ // Don't reassociate reductions.
+ if (N0->getFlags().hasVectorReduction())
+ return SDValue();
+
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N0.hasOneUse()) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
return SDValue();
- }
- if (N1.hasOneUse()) {
- // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
- }
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
+ return SDValue();
+}
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
+ // Floating-point reassociation is not allowed without loose FP math.
+ if (N0.getValueType().isFloatingPoint() ||
+ N1.getValueType().isFloatingPoint())
+ if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+ return SDValue();
+
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ return Combined;
return SDValue();
}
@@ -1026,10 +1204,11 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
-bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
return false;
// Revisit the node.
@@ -1048,12 +1227,13 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
- if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
- 0, AssumeSingleUse))
+ if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ TLO, 0, AssumeSingleUse))
return false;
// Revisit the node.
@@ -1383,6 +1563,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
+ WorklistInserter AddNodes(*this);
+
// Add all the dag nodes to the worklist.
for (SDNode &Node : DAG.allnodes())
AddToWorklist(&Node);
@@ -1392,19 +1574,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // While the worklist isn't empty, find a node and try to combine it.
- while (!WorklistMap.empty()) {
- SDNode *N;
- // The Worklist holds the SDNodes in order, but it may contain null entries.
- do {
- N = Worklist.pop_back_val();
- } while (!N);
-
- bool GoodWorklistEntry = WorklistMap.erase(N);
- (void)GoodWorklistEntry;
- assert(GoodWorklistEntry &&
- "Found a worklist entry without a corresponding map entry!");
-
+ // While we have a valid worklist entry node, try to combine it.
+ while (SDNode *N = getNextWorklistEntry()) {
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
@@ -1493,9 +1664,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SSUBSAT:
case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
- case ISD::UADDO: return visitUADDO(N);
+ case ISD::SADDO:
+ case ISD::UADDO: return visitADDO(N);
case ISD::SUBC: return visitSUBC(N);
- case ISD::USUBO: return visitUSUBO(N);
+ case ISD::SSUBO:
+ case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
@@ -1509,8 +1682,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
- case ISD::SMULO: return visitSMULO(N);
- case ISD::UMULO: return visitUMULO(N);
+ case ISD::SMULO:
+ case ISD::UMULO: return visitMULO(N);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1590,8 +1763,22 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
}
return SDValue();
}
@@ -1644,7 +1831,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
}
- // If N is a commutative binary node, try eliminate it if the commuted
+ // If N is a commutative binary node, try to eliminate it if the commuted
// version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
@@ -1693,6 +1880,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // If the sole user is a token factor, we should make sure we have a
+ // chance to merge them together. This prevents TF chains from inhibiting
+ // optimizations.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+ AddToWorklist(*(N->use_begin()));
+
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1704,8 +1897,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
- SDNode *TF = TFs[i];
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+ SDNode *TF = TFs[i];
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
switch (Op.getOpcode()) {
@@ -1719,8 +1923,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
- // Clean up in case the token factor is removed.
- AddToWorklist(Op.getNode());
Changed = true;
break;
}
@@ -1737,6 +1939,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
// Remove Nodes that are chained to another node in the list. Do so
// by walking up chains breath-first stopping when we've seen
// another operand. In general we must climb to the EntryNode, but we can exit
@@ -1803,6 +2010,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
for (const SDValue &Op : CurNode->op_values())
AddToWorklist(i, Op.getNode(), CurOpNumber);
break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
case ISD::CopyFromReg:
case ISD::CopyToReg:
AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
@@ -1831,9 +2040,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (SeenChains.count(Op.getNode()) == 0)
PrunedOps.push_back(Op);
}
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
} else {
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ Result = DAG.getTokenFactor(SDLoc(N), Ops);
}
}
return Result;
@@ -1869,7 +2078,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
- assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
+ assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+ "Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
@@ -1940,7 +2150,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
- return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SelectOp->setFlags(BO->getFlags());
+ return SelectOp;
}
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
@@ -1990,6 +2202,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
+ // TODO - support non-uniform vector amounts.
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
@@ -2006,7 +2219,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
EVT VT = ShiftOp.getValueType();
SDValue ShAmt = ShiftOp.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
return SDValue();
// Eliminate the 'not' by adjusting the shift and add/sub constant:
@@ -2019,7 +2232,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
}
-SDValue DAGCombiner::visitADD(SDNode *N) {
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2058,13 +2274,22 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return N0;
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ // fold ((A-c1)+c2) -> (A+(c2-c1))
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+ SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+ N0.getOperand(1).getNode());
+ assert(Sub && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+ }
+
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+ N0.getOperand(0).getNode());
+ assert(Add && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
// add (sext i1 X), 1 -> zext (not i1 X)
@@ -2097,9 +2322,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
- return RADD;
-
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ return RADD;
+ }
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
@@ -2116,6 +2342,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
+ // fold ((A-B)+(C-A)) -> (C-B)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N0.getOperand(1));
+
+ // fold ((A-B)+(B-C)) -> (A-C)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N1.getOperand(1));
+
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
@@ -2148,31 +2386,93 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
+ // fold (add (umax X, C), -C) --> (usubsat X, C)
+ if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return (!Max && !Op) ||
+ (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+ };
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+ /*AllowUndefs*/ true))
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (isOneOrOneSplat(N1)) {
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
+ // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+ if (N0.getOpcode() == ISD::ADD ||
+ N0.getOpcode() == ISD::UADDO ||
+ N0.getOpcode() == ISD::SADDO) {
+ SDValue A, Xor;
+
+ if (isBitwiseNot(N0.getOperand(0))) {
+ A = N0.getOperand(1);
+ Xor = N0.getOperand(0);
+ } else if (isBitwiseNot(N0.getOperand(1))) {
+ A = N0.getOperand(0);
+ Xor = N0.getOperand(1);
+ }
+
+ if (Xor)
+ return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+ }
+
+ // Look for:
+ // add (add x, y), 1
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+ }
+ }
+
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
- if (SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
- // fold (add (xor a, -1), 1) -> (sub 0, a)
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
-
- if (SDValue Combined = visitADDLike(N0, N1, N))
- return Combined;
-
- if (SDValue Combined = visitADDLike(N1, N0, N))
- return Combined;
-
return SDValue();
}
@@ -2246,6 +2546,10 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
+ EVT VT = V.getNode()->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+ return SDValue();
+
// If the result is masked, then no matter what kind of bool it is we can
// return. If it isn't, then we need to make sure the bool type is either 0 or
// 1 and not other values.
@@ -2257,7 +2561,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
return SDValue();
}
-SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N0.getValueType();
+ if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+ // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
@@ -2269,21 +2592,42 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndOp0 = N1.getOperand(0);
- unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
- unsigned DestBits = VT.getScalarSizeInBits();
-
- // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
- // and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
- }
+ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+ return V;
- // add (sext i1), X -> sub X, (zext i1)
+ // Look for:
+ // add (add x, 1), y
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+ }
+
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
+
+ // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+ // rather than 'add 0/-1' (the zext should get folded).
+ // add (sext i1 Y), X --> sub X, (zext i1 Y)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
- N0.getOperand(0).getValueType() == MVT::i1 &&
- !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
@@ -2344,8 +2688,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+static SDValue flipBoolean(SDValue V, const SDLoc &DL,
SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+
SDValue Cst;
switch (TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
@@ -2353,35 +2699,60 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
Cst = DAG.getConstant(1, DL, VT);
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getConstant(-1, DL, VT);
+ Cst = DAG.getAllOnesConstant(DL, VT);
break;
}
return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
}
-static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
- if (V.getOpcode() != ISD::XOR) return false;
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (!Const) return false;
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool Force) {
+ if (Force && isa<ConstantSDNode>(V))
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+
+ if (V.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+ if (!Const)
+ return SDValue();
+ EVT VT = V.getValueType();
+
+ bool IsFlip = false;
switch(TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
- return Const->isOne();
+ IsFlip = Const->isOne();
+ break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return Const->isAllOnesValue();
+ IsFlip = Const->isAllOnesValue();
+ break;
case TargetLowering::UndefinedBooleanContent:
- return (Const->getAPIntValue() & 0x01) == 1;
+ IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+ break;
}
- llvm_unreachable("Unsupported boolean content");
+
+ if (IsFlip)
+ return V.getOperand(0);
+ if (Force)
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return SDValue();
}
-SDValue DAGCombiner::visitUADDO(SDNode *N) {
+SDValue DAGCombiner::visitADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SADDO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2392,40 +2763,42 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
- return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
- // fold (uaddo x, 0) -> x + no carry out
- if (isNullConstant(N1))
+ // fold (addo x, 0) -> x + no carry out
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- // If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ if (!IsSigned) {
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
- // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
- SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
- DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
- if (SDValue Combined = visitUADDOLike(N0, N1, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
- if (SDValue Combined = visitUADDOLike(N1, N0, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+ }
return SDValue();
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
- auto VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
@@ -2484,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
- EVT CarryVT = CarryIn.getValueType();
-
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2496,16 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
- // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
- if (isBitwiseNot(N0) && isNullConstant(N1) &&
- isBooleanFlip(CarryIn, CarryVT, TLI)) {
- SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
- DAG.getConstant(0, DL, N0.getValueType()),
- N0.getOperand(0), CarryIn.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
-
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -2515,12 +2877,112 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ SDValue X, SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (addcarry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (addcarry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
+ // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+ if (isBitwiseNot(N0))
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+ SDLoc DL(N);
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
+
// Iff the flag result is dead:
// (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+ // or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
- (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+ N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
@@ -2529,35 +2991,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
- *
- * We are trying to get:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
*/
if (auto Y = getAsCarry(TLI, N1)) {
- /**
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
- */
- if (Y.getOpcode() == ISD::UADDO &&
- CarryIn.getResNo() == 1 &&
- CarryIn.getOpcode() == ISD::ADDCARRY &&
- isNullConstant(CarryIn.getOperand(1)) &&
- CarryIn.getOperand(0) == Y.getValue(0)) {
- auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
- Y.getOperand(0), Y.getOperand(1),
- CarryIn.getOperand(2));
- AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
- DAG.getConstant(0, SDLoc(N), N0.getValueType()),
- NewY.getValue(1));
- }
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
}
return SDValue();
@@ -2620,7 +3060,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// -(X >>s 31) -> (X >>u 31)
if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
- if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+ if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
@@ -2662,16 +3102,48 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ if (N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N11.getNode());
+ assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
}
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold (c1-A)-c2 -> (c1-c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ }
+
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2728,6 +3200,63 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ return V;
+
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ // Look for:
+ // sub y, (xor x, -1)
+ // And if the target does not like this form then turn into:
+ // add (add x, y), 1
+ if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+ }
+
+ // Hoist one-use addition by non-opaque constant:
+ // (x + C) - y -> (x - y) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+ // y - (x + C) -> (y - x) - C
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+ }
+ // (x - C) - y -> (x - y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+ }
+ // (C - x) - y -> C - (x + y)
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+ }
+
+ // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+ // rather than 'sub 0/1' (the sext should get folded).
+ // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+ if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+ }
+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
@@ -2772,7 +3301,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
SDValue ShAmt = N1.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ if (ShAmtC &&
+ ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
}
@@ -2846,12 +3376,11 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SSUBO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2861,17 +3390,25 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getUNDEF(CarryVT));
- // fold (usubo x, x) -> 0 + no borrow
+ // fold (subo x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
- // fold (usubo x, 0) -> x + no borrow
- if (isNullConstant(N1))
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (subox, c) -> (addo x, -c)
+ if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ // fold (subo x, 0) -> x + no borrow
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (isAllOnesConstant(N0))
+ if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
@@ -3012,13 +3549,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
MathOp = ISD::SUB;
if (MathOp != ISD::DELETED_NODE) {
- unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
- : (MulC + 1).logBase2();
- assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
- "Not expecting multiply-by-constant that could have simplified");
+ unsigned ShAmt =
+ MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ assert(ShAmt < VT.getScalarSizeInBits() &&
+ "multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(ShAmt, DL, VT));
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
@@ -3069,7 +3606,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -3612,7 +4149,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
- SDLoc DL(N);
unsigned NumEltBits = VT.getScalarSizeInBits();
SDValue LogBase2 = BuildLogBase2(N1, DL);
SDValue SRLAmt = DAG.getNode(
@@ -3753,22 +4289,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSMULO(SDNode *N) {
- // (smulo x, 2) -> (saddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
-
- return SDValue();
-}
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+ bool IsSigned = (ISD::SMULO == N->getOpcode());
-SDValue DAGCombiner::visitUMULO(SDNode *N) {
- // (umulo x, 2) -> (uaddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ // (mulo x, 2) -> (addo x, x)
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
+ N->getVTList(), N->getOperand(0), N->getOperand(0));
return SDValue();
}
@@ -4075,6 +4603,33 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, Or, Zero, CC1);
}
+
+ // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+ // TODO - support non-uniform vector amounts.
+ if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+ // Match a shared variable operand and 2 non-opaque constant operands.
+ ConstantSDNode *C0 = isConstOrConstSplat(LR);
+ ConstantSDNode *C1 = isConstOrConstSplat(RR);
+ if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ // Canonicalize larger constant as C0.
+ if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
+ std::swap(C0, C1);
+
+ // The difference of the constants must be a single bit.
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &C1Val = C1->getAPIntValue();
+ if ((C0Val - C1Val).isPowerOf2()) {
+ // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
+ // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
+ SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
+ SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
+ }
+ }
+ }
}
// Canonicalize equivalent operands to LL == RL.
@@ -4259,7 +4814,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
// Ensure that this isn't going to produce an unsupported unaligned access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- LDST->getAddressSpace(), ShAmt / 8))
+ LDST->getAddressSpace(), ShAmt / 8,
+ LDST->getMemOperand()->getFlags()))
return false;
// It's not possible to generate a constant of extended or untyped type.
@@ -4316,9 +4872,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
SDNode *&NodeToMask) {
// Recursively search for the operands, looking for loads which can be
// narrowed.
- for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
- SDValue Op = N->getOperand(i);
-
+ for (SDValue Op : N->op_values()) {
if (Op.getValueType().isVector())
return false;
@@ -4480,7 +5034,7 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
SDValue N1 = N->getOperand(1);
// Do we actually prefer shifts over mask?
- if (!TLI.preferShiftsToClearExtremeBits(N0))
+ if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
return SDValue();
// Try to match (-1 '[outer] logical shift' y)
@@ -4575,7 +5129,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
@@ -4644,24 +5198,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
- EVT VT = Vector->getValueType(0);
- unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
// the lane size.
- if (BitWidth > SplatBitSize)
- for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
- SplatBitSize < BitWidth;
- SplatBitSize = SplatBitSize * 2)
+ if (EltBitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+ SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
- if (SplatBitSize % BitWidth == 0) {
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ if ((SplatBitSize % EltBitWidth) == 0) {
+ Constant = APInt::getAllOnesValue(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
}
@@ -4773,44 +5325,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
+ if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ (ISD::isEXTLoad(N0.getNode()) ||
+ (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
+ unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+ unsigned MemBitSize = MemVT.getScalarSizeInBits();
+ APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+ if (DAG.MaskedValueIsZero(N1, ExtBits) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -5155,6 +5692,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+ SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == ISD::AND) {
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5284,7 +5838,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -5302,6 +5856,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
@@ -5318,6 +5877,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
return SDValue();
}
@@ -5869,6 +6434,213 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
+static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+ return i;
+}
+
+static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+ return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and None if match failed.
+static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+ int64_t FirstOffset) {
+ // The endian can be decided only when it is 2 bytes at least.
+ unsigned Width = ByteOffsets.size();
+ if (Width < 2)
+ return None;
+
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < Width; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) && "It should be either big endian or"
+ "little endian");
+ return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+ switch (Value.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ return stripTruncAndExt(Value.getOperand(0));
+ }
+ return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+ // Collect all the stores in the chain.
+ SDValue Chain;
+ SmallVector<StoreSDNode *, 8> Stores;
+ for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ if (Store->getMemoryVT() != MVT::i8 ||
+ Store->isVolatile() || Store->isIndexed())
+ return SDValue();
+ Stores.push_back(Store);
+ Chain = Store->getChain();
+ }
+ // Handle the simple type only.
+ unsigned Width = Stores.size();
+ EVT VT = EVT::getIntegerVT(
+ *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ return SDValue();
+
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
+ SDValue CombinedValue;
+ SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ int64_t FirstOffset = INT64_MAX;
+ StoreSDNode *FirstStore = nullptr;
+ Optional<BaseIndexOffset> Base;
+ for (auto Store : Stores) {
+ // All the stores store different byte of the CombinedValue. A truncate is
+ // required to get that byte value.
+ SDValue Trunc = Store->getValue();
+ if (Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ // A shift operation is required to get the right byte offset, except the
+ // first byte.
+ int64_t Offset = 0;
+ SDValue Value = Trunc.getOperand(0);
+ if (Value.getOpcode() == ISD::SRL ||
+ Value.getOpcode() == ISD::SRA) {
+ ConstantSDNode *ShiftOffset =
+ dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
+ // a constant and a multiple of 8. It is the byte offset in "y".
+ //
+ // x = srl y, offset
+ // i8 z = trunc x
+ // store z, ...
+ if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ return SDValue();
+
+ Offset = ShiftOffset->getSExtValue()/8;
+ Value = Value.getOperand(0);
+ }
+
+ // Stores must share the same combined value with different offsets.
+ if (!CombinedValue)
+ CombinedValue = Value;
+ else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ return SDValue();
+
+ // The trunc and all the extend operation should be stripped to get the
+ // real value we are stored.
+ else if (CombinedValue.getValueType() != VT) {
+ if (Value.getValueType() == VT ||
+ Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+ CombinedValue = Value;
+ // Give up if the combined value type is smaller than the store size.
+ if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ return SDValue();
+ }
+
+ // Stores must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Remember the first byte store
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstStore = Store;
+ FirstOffset = ByteOffsetFromBase;
+ }
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ return SDValue();
+ ByteOffsets[Offset] = ByteOffsetFromBase;
+ }
+
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ assert(FirstStore && "First store must be set");
+
+ // Check if the bytes of the combined value we are looking at match with
+ // either big or little endian value store.
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single bswap if needed and store.
+
+ // If the store needs byte swap check if the target supports it
+ bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstStore->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ if (VT != CombinedValue.getValueType()) {
+ assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+ "Get unexpected store value to combine");
+ CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+ CombinedValue);
+ }
+
+ if (NeedsBswap)
+ CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+ SDValue NewStore =
+ DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+ // Rely on other DAG combine rules to remove the other individual stores.
+ DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+ return NewStore;
+}
+
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
@@ -5916,11 +6688,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
- std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
- unsigned BW, unsigned i) { return i; };
- std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
- unsigned BW, unsigned i) { return BW - i - 1; };
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -5987,15 +6754,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- bool BigEndian = true, LittleEndian = true;
- for (unsigned i = 0; i < ByteWidth; i++) {
- int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
- if (!BigEndian && !LittleEndian)
- return SDValue();
- }
- assert((BigEndian != LittleEndian) && "should be either or");
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
assert(FirstByteProvider && "must be set");
// Ensure that the first byte is loaded from zero offset of the first load.
@@ -6008,7 +6770,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// replace it with a single load and bswap if needed.
// If the load needs byte swap check if the target supports it
- bool NeedsBswap = IsBigEndianTarget != BigEndian;
+ bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
@@ -6019,8 +6781,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, FirstLoad->getAddressSpace(),
- FirstLoad->getAlignment(), &Fast);
+ VT, *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
@@ -6160,7 +6921,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
@@ -6218,6 +6979,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
+
+ // fold (not (neg x)) -> (add X, -1)
+ // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+ // Y is a constant or the subtract has a single use.
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -6310,11 +7081,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+/// shift (binop X, C0), C1
+/// And want to transform into:
+/// binop (shift X, C1), (shift C0, C1)
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
+ // The inner binop must be one-use, since we want to replace it.
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -6322,56 +7098,43 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- bool HighBitSet = false; // Can we transform this if the high bit is set?
-
switch (LHS->getOpcode()) {
- default: return SDValue();
+ default:
+ return SDValue();
case ISD::OR:
case ISD::XOR:
- HighBitSet = false; // We can only transform sra if the high bit is clear.
- break;
case ISD::AND:
- HighBitSet = true; // We can only transform sra if the high bit is set.
break;
case ISD::ADD:
if (N->getOpcode() != ISD::SHL)
return SDValue(); // only shl(add) not sr[al](add).
- HighBitSet = false; // We can only transform sra if the high bit is clear.
break;
}
// We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
- if (!BinOpCst) return SDValue();
+ if (!BinOpCst)
+ return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
- // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
- SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
- bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
- BinOpLHSVal->getOpcode() == ISD::SRA ||
- BinOpLHSVal->getOpcode() == ISD::SRL;
- bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
- BinOpLHSVal->getOpcode() == ISD::SELECT;
-
- if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
- !isCopyOrSelect)
+ // or is copy/select. Enable this in other cases when figure out it's exactly
+ // profitable.
+ SDValue BinOpLHSVal = LHS->getOperand(0);
+ bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+ BinOpLHSVal.getOpcode() == ISD::SRA ||
+ BinOpLHSVal.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+ bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+ if (!IsShiftByConstant && !IsCopyOrSelect)
return SDValue();
- if (isCopyOrSelect && N->hasOneUse())
+ if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
EVT VT = N->getValueType(0);
- // If this is a signed shift right, and the high bit is modified by the
- // logical operation, do not perform the transformation. The highBitSet
- // boolean indicates the value of the high bit of the constant which would
- // cause it to be modified for this operation.
- if (N->getOpcode() == ISD::SRA) {
- bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
- if (BinOpRHSSignSet != HighBitSet)
- return SDValue();
- }
-
if (!TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
@@ -6395,11 +7158,12 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
assert(N->getOperand(0).getOpcode() == ISD::AND);
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
- if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ EVT TruncVT = N->getValueType(0);
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+ TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
SDValue N01 = N->getOperand(0).getOperand(1);
if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
SDLoc DL(N);
- EVT TruncVT = N->getValueType(0);
SDValue N00 = N->getOperand(0).getOperand(0);
SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
@@ -6431,6 +7195,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
// fold (rot x, c) -> (rot x, c % BitSize)
+ // TODO - support non-uniform vector amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
if (Cst->getAPIntValue().uge(Bitsize)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
@@ -6476,6 +7241,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return V;
EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
@@ -6506,6 +7272,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
@@ -6517,6 +7284,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
+
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -6524,6 +7292,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6548,69 +7317,86 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
- EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
- // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
// For this to be valid, the second form must not preserve any of the bits
// that are shifted out by the inner shift in the first form. This means
// the outer shift size must be >= the number of bits added by the ext.
// As a corollary, we don't care what kind of ext it is.
- if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::ANY_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- APInt c1 = N0Op0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+ EVT InnerVT = N0Op0.getValueType();
+ uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
- EVT InnerShiftVT = N0Op0.getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- if (c2.uge(OpSizeInBits - InnerShiftSize)) {
- SDLoc DL(N0);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
+ auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true))
+ return DAG.getConstant(0, SDLoc(N), VT);
- return DAG.getNode(
- ISD::SHL, DL, VT,
- DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
- }
+ auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+ SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+ Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
}
}
// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDLoc DL(N);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
- NewOp0,
- DAG.getConstant(c2, DL, CountVT));
- AddToWorklist(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
- }
- }
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+ auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2);
+ return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+ NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
+ // TODO - support non-uniform vector shift amounts.
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -6619,9 +7405,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDLoc DL(N);
if (C1 <= C2)
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+ DAG.getConstant(C2 - C1, DL, ShiftVT));
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
- DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+ DAG.getConstant(C1 - C2, DL, ShiftVT));
}
}
@@ -6629,11 +7415,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
+ // TODO - drop hasOneUse requirement if c1 == c2?
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldShiftPairToMask(N, Level)) {
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N0C1->getZExtValue();
- if (c1 < OpSizeInBits) {
+ if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
+ uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
SDValue Shift;
@@ -6641,12 +7429,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
Mask <<= c2 - c1;
SDLoc DL(N);
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, N1.getValueType()));
+ DAG.getConstant(c2 - c1, DL, ShiftVT));
} else {
Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, N1.getValueType()));
+ DAG.getConstant(c1 - c2, DL, ShiftVT));
}
SDLoc DL(N0);
return DAG.getNode(ISD::AND, DL, VT, Shift,
@@ -6719,6 +7507,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
@@ -6815,32 +7604,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
+ // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
+ // TODO - support non-uniform vector shift amounts.
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
- N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C) {
+ N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
- unsigned LargeShiftVal = LargeShift->getZExtValue();
EVT LargeVT = N0Op0.getValueType();
-
- if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+ if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
- SDValue Amt =
- DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
- getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
- N0Op0.getOperand(0), Amt);
+ SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
+ getShiftAmountTy(LargeVT));
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
// Simplify, based on bits shifted out of the LHS.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6872,6 +7661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
@@ -6912,6 +7702,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
@@ -6935,6 +7726,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
+ // TODO - (srl (shl x, c1), c2).
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
@@ -6945,11 +7737,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
unsigned BitSize = SmallVT.getScalarSizeInBits();
- if (N1C->getZExtValue() >= BitSize)
+ if (N1C->getAPIntValue().uge(BitSize))
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -6970,7 +7763,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
+ if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
@@ -7021,6 +7814,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold operands of srl based on knowledge that the low bits are not
// demanded.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -7079,13 +7873,49 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1;
- // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ auto IsUndefOrZero = [](SDValue V) {
+ return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+ };
+
+ // TODO - support non-uniform vector shift amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
- DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
}
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
@@ -7096,6 +7926,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
if (N0 == N1 && hasOperation(RotOpc, VT))
return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+ // Simplify, based on bits shifted out of N0/N1.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -7207,11 +8041,14 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
// FIXME: This should be checking for no signed zeros on individual operands, as
// well as no nans.
-static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+ SDValue RHS,
+ const TargetLowering &TLI) {
const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ TLI.isProfitableToCombineMinNumMaxNum(VT) &&
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
@@ -7364,6 +8201,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
@@ -7414,20 +8252,26 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
- InnerSelect, N2);
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
- InnerSelect);
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
@@ -7439,12 +8283,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
}
// Otherwise see if we can optimize the "and" to a better pattern.
- if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
- N2);
+ N2, Flags);
+ }
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -7456,20 +8302,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
- N2_2);
+ N2_2, Flags);
}
}
}
- if (VT0 == MVT::i1) {
- // select (not Cond), N1, N2 -> select Cond, N2, N1
- if (isBitwiseNot(N0))
- return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
}
// Fold selects based on a setcc into other things, such as min/max/abs.
@@ -7481,7 +8329,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
CC, TLI, DAG))
return FMinMax;
@@ -7516,9 +8364,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
- (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
- N0.getOperand(2));
+ (!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+ // Any flags available in a select/setcc fold will be on the setcc as they
+ // migrated from fcmp
+ Flags = N0.getNode()->getFlags();
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+ N2, N0.getOperand(2));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
return SimplifySelect(DL, N0, N1, N2);
}
@@ -7599,14 +8454,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
+ SDValue Data = MSC->getValue();
+ SDValue Chain = MSC->getChain();
SDLoc DL(N);
+ // Zap scatters with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSCATTER data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7624,8 +8484,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
- SDValue Chain = MSC->getChain();
-
EVT MemoryVT = MSC->getMemoryVT();
unsigned Alignment = MSC->getOriginalAlignment();
@@ -7658,15 +8516,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
+ SDValue Data = MST->getValue();
+ SDValue Chain = MST->getChain();
EVT VT = Data.getValueType();
SDLoc DL(N);
+ // Zap masked stores with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSTORE data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7680,17 +8543,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Chain = MST->getChain();
SDValue Ptr = MST->getBasePtr();
EVT MemoryVT = MST->getMemoryVT();
unsigned Alignment = MST->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7712,7 +8569,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -7728,13 +8585,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDLoc DL(N);
+ // Zap gathers with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MGATHER result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7805,13 +8666,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
SDValue Mask = MLD->getMask();
SDLoc DL(N);
+ // Zap masked loads with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MLOAD result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7839,12 +8704,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
EVT MemoryVT = MLD->getMemoryVT();
unsigned Alignment = MLD->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7862,7 +8721,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
@@ -7943,11 +8802,16 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
SDLoc DL(N);
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
+ // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+ return DAG.getSelect(DL, VT, F, N2, N1);
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -7987,11 +8851,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- EVT VT = N->getValueType(0);
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
+ N0.getOperand(1), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -8080,9 +8943,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2;
} else if (SCC.getOpcode() == ISD::SETCC) {
// Fold to a simpler select_cc
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
- SCC.getOperand(0), SCC.getOperand(1), N2, N3,
- SCC.getOperand(2));
+ SDValue SelectOp = DAG.getNode(
+ ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SelectOp->setFlags(SCC->getFlags());
+ return SelectOp;
}
}
@@ -8148,6 +9013,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
@@ -8158,7 +9024,33 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
- return DAG.getNode(Opcode, SDLoc(N), VT, N0);
+ return DAG.getNode(Opcode, DL, VT, N0);
+
+ // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+ // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ if (N0->getOpcode() == ISD::SELECT) {
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+ (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+ // For any_extend, choose sign extension of the constants to allow a
+ // possible further transform to sign_extend_inreg.i.e.
+ //
+ // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+ // t2: i64 = any_extend t1
+ // -->
+ // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+ // -->
+ // t4: i64 = sign_extend_inreg t3
+ unsigned FoldOpc = Opcode;
+ if (FoldOpc == ISD::ANY_EXTEND)
+ FoldOpc = ISD::SIGN_EXTEND;
+ return DAG.getSelect(DL, VT, N0->getOperand(0),
+ DAG.getNode(FoldOpc, DL, VT, Op1),
+ DAG.getNode(FoldOpc, DL, VT, Op2));
+ }
+ }
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
@@ -8173,7 +9065,6 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
- SDLoc DL(N);
// For zero-extensions, UNDEF elements still guarantee to have the upper
// bits set to zero.
@@ -8387,6 +9278,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
assert(N->getOpcode() == ISD::ZERO_EXTEND);
EVT VT = N->getValueType(0);
+ EVT OrigVT = N->getOperand(0).getValueType();
+ if (TLI.isZExtFree(OrigVT, VT))
+ return SDValue();
// and/or/xor
SDValue N0 = N->getOperand(0);
@@ -8450,6 +9344,10 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
Load->getValueType(0), ExtLoad);
CombineTo(Load, Trunc, ExtLoad.getValue(1));
}
+
+ // N0 is dead at this point.
+ recursivelyDeleteUnusedNodes(N0.getNode());
+
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
@@ -8509,19 +9407,21 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
: ISD::isZEXTLoad(N0Node);
if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
!ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
- return {};
+ return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
- return {};
+ return SDValue();
SDValue ExtLoad =
DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
Combiner.CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ if (LN0->use_empty())
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -8559,6 +9459,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
Combiner.CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
@@ -8804,6 +9705,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ // Eliminate this sign extend by doing a negation in the destination type:
+ // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isNullOrNullSplat(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ }
+ // Eliminate this sign extend by doing a decrement in the destination type:
+ // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -9061,14 +9981,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
SDValue ShAmt = N0.getOperand(1);
- unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
InnerZExt.getOperand(0).getValueSizeInBits();
- if (ShAmtVal > KnownZeroBits)
+ if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
return SDValue();
}
@@ -9162,6 +10081,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -9185,6 +10105,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -9574,14 +10495,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
- return DAG.getNode(ISD::SRA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1));
+ if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1));
}
}
@@ -9667,10 +10588,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
// noop truncate
- if (N0.getValueType() == N->getValueType(0))
+ if (SrcVT == VT)
return N0;
// fold (truncate (truncate x)) -> (truncate x)
@@ -9740,7 +10662,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
- EVT SrcVT = N0.getValueType();
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
SDLoc SL(N0);
@@ -9753,7 +10674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
@@ -9771,6 +10692,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Attempt to pre-truncate BUILD_VECTOR sources.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 8> TruncOps;
+ for (const SDValue &Op : N0->op_values()) {
+ SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+ TruncOps.push_back(TruncOp);
+ }
+ return DAG.getBuildVector(VT, DL, TruncOps);
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -9906,7 +10840,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// When the adde's carry is not used.
if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
- (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ // We only do for addcarry before legalize operation
+ ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT))) {
SDLoc SL(N);
auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
@@ -10070,14 +11006,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
- // Only do this before legalize types, since we might create an illegal
- // scalar type. Even if we knew we wouldn't create an illegal scalar type
- // we can only do this before legalize ops, since the target maybe
- // depending on the bitcast.
+ // Only do this before legalize types, unless both types are integer and the
+ // scalar type is legal. Only do this before legalize ops, since the target
+ // maybe depending on the bitcast.
// First check to see if this is all constant.
- if (!LegalTypes &&
+ // TODO: Support FP bitcasts after legalize types.
+ if (VT.isVector() &&
+ (!LegalTypes ||
+ (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+ TLI.isTypeLegal(VT.getVectorElementType()))) &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
- VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+ cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
@@ -10113,18 +11052,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isOperationLegal(ISD::LOAD, VT)) &&
- TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned OrigAlign = LN0->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- LN0->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), OrigAlign,
+ LN0->getPointerInfo(), LN0->getAlignment(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11071,15 +12006,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -11105,8 +12042,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
- // If 'unsafe math' or nnan is enabled, fold lots of things.
- if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ // If nnan is enabled, fold lots of things.
+ if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -11246,16 +12183,20 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+ if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
// (fsub -0.0, N1) -> -N1
+ // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
+ // FSUB does not specify the sign bit of a NaN. Also note that for
+ // the same reason, the inverse transform is not safe, unless fast math
+ // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
+ return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
@@ -11273,9 +12214,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -11319,7 +12261,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath ||
+ if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -11361,14 +12303,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
@@ -11506,7 +12452,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// fma (fneg x), K, y -> fma x -K, y
if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
+ ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
}
@@ -11541,22 +12488,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ // TODO: Limit this transform based on optsize/minsize - it always creates at
+ // least 1 extra instruction. But the perf win may be substantial enough
+ // that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
if (!UnsafeMath && !Flags.hasAllowReciprocal())
return SDValue();
- // Skip if current node is a reciprocal.
+ // Skip if current node is a reciprocal/fneg-reciprocal.
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- if (N0CFP && N0CFP->isExactlyValue(1.0))
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+ if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
- if (!MinUses || N1->use_size() < MinUses)
+
+ // For splat vectors, scale the number of uses by the splat factor. If we can
+ // convert the division into a scalar op, that will likely be much faster.
+ unsigned NumElts = 1;
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && DAG.isSplatValue(N1))
+ NumElts = VT.getVectorNumElements();
+
+ if (!MinUses || (N1->use_size() * NumElts) < MinUses)
return SDValue();
// Find all FDIV users of the same divisor.
@@ -11573,10 +12531,9 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// Now that we have the actual number of divisor uses, make sure it meets
// the minimum threshold specified by the target.
- if (Users.size() < MinUses)
+ if ((Users.size() * NumElts) < MinUses)
return SDValue();
- EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
@@ -11619,6 +12576,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ if (SDValue V = combineRepeatedFPDivisors(N))
+ return V;
+
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -11634,7 +12594,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT)))
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT), Flags);
}
@@ -11692,21 +12652,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
- if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
- return CombineRepeatedDivisors;
-
return SDValue();
}
@@ -11838,18 +12799,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
}
- // Try to convert x ** (1/4) into square roots.
+ // Try to convert x ** (1/4) and x ** (3/4) into square roots.
// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
// TODO: This could be extended (using a target hook) to handle smaller
// power-of-2 fractional exponents.
- if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+ bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+ bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+ if (ExponentIs025 || ExponentIs075) {
// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+ // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
// For regular numbers, rounding may cause the results to differ.
// Therefore, we require { nsz ninf afn } for this transform.
// TODO: We could select out the special cases if we don't have nsz/ninf.
SDNodeFlags Flags = N->getFlags();
- if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+
+ // We only need no signed zeros for the 0.25 case.
+ if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
!Flags.hasApproximateFuncs())
return SDValue();
@@ -11859,13 +12826,17 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().optForSize())
+ if (DAG.getMachineFunction().getFunction().hasOptSize())
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ if (ExponentIs025)
+ return SqrtSqrt;
+ // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
}
return SDValue();
@@ -11911,6 +12882,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -11968,6 +12943,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -12051,6 +13030,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_sint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_sint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
@@ -12062,6 +13045,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_uint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_uint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
@@ -12250,8 +13237,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options))
- return GetNegatedExpression(N0, DAG, LegalOperations);
+ &DAG.getTarget().Options, ForCodeSize))
+ return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -12287,7 +13274,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT) ||
+ (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
@@ -12556,6 +13543,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12564,6 +13552,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
// [reg +/- reg]
AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12653,7 +13642,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Check #2.
if (!isLoad) {
SDValue Val = cast<StoreSDNode>(N)->getValue();
- if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+
+ // Would require a copy.
+ if (Val == BasePtr)
+ return false;
+
+ // Would create a cycle.
+ if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
return false;
}
@@ -13190,7 +14185,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
+ SDValue BetterChain = FindBetterChain(LD, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
@@ -13378,7 +14373,7 @@ struct LoadedSlice {
/// Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
- unsigned Offset = getOffsetFromBase();
+ uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
Alignment = MinAlign(Alignment, Alignment + Offset);
return Alignment;
@@ -13500,9 +14495,11 @@ struct LoadedSlice {
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
- const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ResRC =
+ TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
const TargetRegisterClass *ArgRC =
- TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+ Use->getOperand(0)->isDivergent());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
@@ -13826,7 +14823,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
// For narrowing to be valid, it must be the case that the load the
- // immediately preceeding memory operation before the store.
+ // immediately preceding memory operation before the store.
if (LD == Chain.getNode())
; // ok.
else if (Chain->getOpcode() == ISD::TokenFactor &&
@@ -14039,11 +15036,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
- Value.hasOneUse() &&
- Chain == SDValue(Value.getNode(), 1)) {
+ Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
if (!VT.isFloatingPoint() ||
@@ -14073,7 +15068,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
LD->getPointerInfo(), LDAlign);
SDValue NewST =
- DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+ DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
ST->getPointerInfo(), STAlign);
AddToWorklist(NewLD.getNode());
@@ -14171,14 +15166,14 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
Visited.insert(StoreNodes[i].MemNode);
}
- // don't include nodes that are children
+ // don't include nodes that are children or repeated nodes.
for (unsigned i = 0; i < NumStores; ++i) {
- if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
assert(Chains.size() > 0 && "Chain should have generated a chain");
- return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
+ return DAG.getTokenFactor(StoreDL, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -14372,15 +15367,19 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (Ld->isVolatile() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
+ // The memory operands must not be volatile/indexed.
if (Other->isVolatile() || Other->isIndexed())
return false;
- SDValue Val = peekThroughBitcasts(Other->getValue());
+ // Don't mix temporal stores with non-temporal stores.
+ if (St->isNonTemporal() != Other->isNonTemporal())
+ return false;
+ SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
@@ -14388,16 +15387,19 @@ void DAGCombiner::getStoreMergeCandidates(
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
- auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (OtherLd->isVolatile() || OtherLd->isIndexed())
return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -14406,17 +15408,17 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsConstantSrc) {
if (NoTypeMatch)
return false;
- if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+ if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
}
if (IsExtractVecSrc) {
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
- if (!MemVT.bitsEq(Val.getValueType()))
+ if (!MemVT.bitsEq(OtherBC.getValueType()))
return false;
- if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
- Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
Ptr = BaseIndexOffset::match(Other, DAG);
@@ -14441,9 +15443,11 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
+ unsigned NumNodesExplored = 0;
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
if (I2.getOperandNo() == 0)
@@ -14454,7 +15458,8 @@ void DAGCombiner::getStoreMergeCandidates(
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
@@ -14551,6 +15556,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad =
+ IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
return false;
@@ -14652,8 +15660,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
LastIntegerTrunc = false;
LastLegalType = i + 1;
@@ -14664,8 +15672,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
@@ -14683,8 +15692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(
+ Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
}
@@ -14755,8 +15764,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
@@ -14847,7 +15856,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
// Scan the memory operations on the chain and find the first
@@ -14887,11 +15895,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
@@ -14901,11 +15909,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = false;
@@ -14920,11 +15928,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
@@ -14994,26 +16003,32 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
- MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::Flags LdMMOFlags =
isDereferenceable ? MachineMemOperand::MODereferenceable
: MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags =
+ IsNonTemporalStore ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad =
DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
@@ -15168,16 +16183,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
- TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
- unsigned OrigAlign = ST->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
- ST->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
- return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), OrigAlign,
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
- }
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+ DAG, *ST->getMemOperand())) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
}
}
@@ -15205,6 +16215,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
+ // Try transforming several stores into STORE (BSWAP).
+ if (SDValue Store = MatchStoreCombine(ST))
+ return Store;
+
if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
@@ -15221,23 +16235,22 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits());
+
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(
- Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()));
+ SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter.getNode())
- return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
- Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ if (Shorter)
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
- if (SimplifyDemandedBits(
- Value,
- APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()))) {
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
// node back to the worklist if necessary, but we also need to re-visit
@@ -15263,25 +16276,55 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
- ST->getMemoryVT() == ST1->getMemoryVT()) {
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
- if (ST1->getValue() == Value) {
- // The store is dead, remove it.
+ !ST1->isVolatile()) {
+ if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the
+ // same location, then the store is dead/noop.
return Chain;
}
- // If this is a store who's preceeding store to the same location
- // and no one other node is chained to that store we can effectively
- // drop the store. Do not remove stores to undef as they may be used as
- // data sinks.
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef()) {
- // ST1 is fully overwritten and can be elided. Combine with it's chain
- // value.
- CombineTo(ST1, ST1->getChain());
- return SDValue();
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they may
+ // be used as data sinks.
+ if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+
+ // If ST stores to a subset of preceding store's write set, we may be
+ // able to fold ST's value into the preceding stored value. As we know
+ // the other uses of ST1's chain are unconcerned with ST, this folding
+ // will not affect those nodes.
+ int64_t BitOffset;
+ if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
+ BitOffset)) {
+ SDValue ChainValue = ST1->getValue();
+ if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = C1->getAPIntValue();
+ APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
+ // FIXME: Handle Big-endian mode.
+ if (!DAG.getDataLayout().isBigEndian()) {
+ Val.insertBits(InsertVal, BitOffset);
+ SDValue NewSDVal =
+ DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
+ C1->isTargetOpcode(), C1->isOpaque());
+ SDNode *NewST1 = DAG.UpdateNodeOperands(
+ ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
+ ST1->getOperand(3));
+ return CombineTo(ST, SDValue(NewST1, 0));
+ }
+ }
+ }
+ } // End ST subset of ST1 case.
}
}
}
@@ -15299,7 +16342,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Always perform this optimization before types are legal. If the target
// prefers, also try this after legalization to catch stores that were created
// by intrinsics or other nodes.
- if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
@@ -15333,6 +16376,54 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return ReduceLoadOpStoreWidth(N);
}
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+ const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+ if (!LifetimeEnd->hasOffset())
+ return SDValue();
+
+ const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+ LifetimeEnd->getOffset(), false);
+
+ // We walk up the chains to find stores.
+ SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+ if (!Chain.hasOneUse())
+ continue;
+ switch (Chain.getOpcode()) {
+ case ISD::TokenFactor:
+ for (unsigned Nops = Chain.getNumOperands(); Nops;)
+ Chains.push_back(Chain.getOperand(--Nops));
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the node.
+ if (!isAlias(Chain.getNode(), N))
+ Chains.push_back(Chain.getOperand(0));
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+ if (ST->isVolatile() || ST->isIndexed())
+ continue;
+ const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+ // If we store purely within object bounds just before its lifetime ends,
+ // we can remove the store.
+ if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+ ST->getMemoryVT().getStoreSizeInBits())) {
+ LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+ dbgs() << "\nwithin LIFETIME_END of : ";
+ LifetimeEndBase.dump(); dbgs() << "\n");
+ CombineTo(ST, ST->getChain());
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
/// Sometimes it is more efficent to generate separate stores for F and I,
@@ -15616,7 +16707,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Offset = DAG.getNode(
ISD::MUL, DL, PtrType, Offset,
DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
- MPI = OriginalLoad->getPointerInfo();
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
@@ -15668,14 +16761,15 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+ Vec.getNode()->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();
@@ -16073,7 +17167,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx) {
+ unsigned LeftIdx, bool DidSplitVec) {
MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
@@ -16081,17 +17175,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
- // In case both the input vectors are extracted from same base
- // vector we do not need extra addend (Vec2Offset) while
- // computing shuffle mask.
- if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
- Vec2Offset = InVT1.getVectorNumElements();
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
@@ -16214,23 +17303,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// The build vector contains some number of undef elements and exactly
// one other element. That other element must be a zero-extended scalar
// extracted from a vector at a constant index to turn this into a shuffle.
+ // Also, require that the build vector does not implicitly truncate/extend
+ // its elements.
// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ EVT VT = BV->getValueType(0);
SDValue Zext = BV->getOperand(ZextElt);
if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+ Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
- // The zero-extend must be a multiple of the source size.
+ // The zero-extend must be a multiple of the source size, and we must be
+ // building a vector of the same size as the source of the extract element.
SDValue Extract = Zext.getOperand(0);
unsigned DestSize = Zext.getValueSizeInBits();
unsigned SrcSize = Extract.getValueSizeInBits();
- if (DestSize % SrcSize != 0)
+ if (DestSize % SrcSize != 0 ||
+ Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
return SDValue();
// Create a shuffle mask that will combine the extracted element with zeros
// and undefs.
- int ZextRatio = DestSize / SrcSize;
+ int ZextRatio = DestSize / SrcSize;
int NumMaskElts = NumBVOps * ZextRatio;
SmallVector<int, 32> ShufMask(NumMaskElts, -1);
for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +17355,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
ShufMask);
- return DAG.getBitcast(BV->getValueType(0), Shuf);
+ return DAG.getBitcast(VT, Shuf);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -16316,7 +17411,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
- APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
@@ -16344,6 +17439,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// vector, then split the vector efficiently based on the maximum
// vector access index and adjust the VectorMask and
// VecIn accordingly.
+ bool DidSplitVec = false;
if (VecIn.size() == 2) {
unsigned MaxIndex = 0;
unsigned NearestPow2 = 0;
@@ -16374,6 +17470,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
+ DidSplitVec = true;
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
@@ -16411,7 +17508,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
- VecRight, LeftIdx))
+ VecRight, LeftIdx, DidSplitVec))
Shuffles.push_back(Shuffle);
else
return SDValue();
@@ -16477,18 +17574,20 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
-// TODO: Support sign extend or any extend?
+// TODO: Support sign extend?
// TODO: Allow undef elements?
-// TODO: Don't require the extracts to start at element 0.
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
+ bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
- if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@@ -16520,7 +17619,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
@@ -16885,14 +17985,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
- unsigned IdentityIndex = i * PartNumElem;
- ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// The extract index must be constant.
if (!CS)
return SDValue();
// Check that we are reading from the identity index.
- if (CS->getZExtValue() != IdentityIndex)
+ unsigned IdentityIndex = i * PartNumElem;
+ if (CS->getAPIntValue() != IdentityIndex)
return SDValue();
}
@@ -16902,12 +18002,59 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = Extract->getOperand(0);
+ unsigned BinOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+ SDValue Index = Extract->getOperand(1);
+ EVT VT = Extract->getValueType(0);
+
+ // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+ // if the source subvector is the same type as the one being extracted.
+ auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == VT &&
+ (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+ };
+ SDValue Sub0 = GetSubVector(Bop0);
+ SDValue Sub1 = GetSubVector(Bop1);
+
+ // TODO: We could handle the case where only 1 operand is being inserted by
+ // creating an extract of the other operand, but that requires checking
+ // number of uses and/or costs.
+ if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ return SDValue();
+
+ // We are inserting both operands of the wide binop only to extract back
+ // to the narrow vector size. Eliminate all of the insert/extract:
+ // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+ return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ BinOp->getFlags());
+}
+
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ return V;
+
// The extract index must be a constant, so we can map it to a concat operand.
auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
if (!ExtractIndexC)
@@ -16915,8 +18062,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
- if (!ISD::isBinaryOp(BinOp.getNode()))
+ unsigned BOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
// The binop must be a vector type, so we can extract some fraction of it.
@@ -16945,8 +18094,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- unsigned BOpcode = BinOp.getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
@@ -16986,35 +18133,35 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
- // TODO: Should we also handle INSERT_SUBVECTOR patterns?
- SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
- SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
- bool ConcatL =
- LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
- bool ConcatR =
- RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
- if (!ConcatL && !ConcatR)
+ auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+ return V.getOperand(ConcatOpNum);
return SDValue();
+ };
+ SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+ SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+ if (SubVecL || SubVecR) {
+ // If a binop operand was not the result of a concat, we must extract a
+ // half-sized operand for our new narrow binop:
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+ SDLoc DL(Extract);
+ SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), IndexC);
- // If one of the binop operands was not the result of a concat, we must
- // extract a half-sized operand for our new narrow binop.
- SDLoc DL(Extract);
-
- // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
- // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
- // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
- SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(0),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), IndexC);
- SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(1),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
- return DAG.getBitcast(VT, NarrowBinOp);
+ return SDValue();
}
/// If we are extracting a subvector from a wide vector load, convert to a
@@ -17052,7 +18199,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return NewLd;
}
-SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -17064,14 +18211,51 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
return NarrowLoad;
+ // Combine an extract of an extract into a single extract_subvector.
+ // ext (ext X, C), 0 --> ext X, C
+ SDValue Index = N->getOperand(1);
+ if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+ V.getConstantOperandVal(1)) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ V.getOperand(1));
+ }
+ }
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType().isVector()) {
+ SDValue SrcOp = V.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ if ((SrcNumElts % DestNumElts) == 0) {
+ unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+ unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NewExtNumElts);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ // TODO - handle (DestNumElts % SrcNumElts) == 0
+ }
+
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS &&
- isa<ConstantSDNode>(N->getOperand(1)) &&
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
V.getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
@@ -17084,7 +18268,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
- if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
@@ -17092,26 +18276,27 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
EVT EltVT = InVT.getVectorElementType();
- EVT ExtractVT = NumElems == 1 ? EltVT :
- EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ EVT ExtractVT = NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElems);
if ((Level < AfterLegalizeDAG ||
(NumElems == 1 ||
TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
- unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
- EltSize;
+ unsigned IdxVal = IdxC->getZExtValue();
+ IdxVal *= NVT.getScalarSizeInBits();
+ IdxVal /= EltSize;
+
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
-
return DAG.getBitcast(NVT, Src);
}
// Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
- makeArrayRef(V->op_begin() + IdxVal,
- NumElems));
+ SDValue BuildVec = DAG.getBuildVector(
+ ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
return DAG.getBitcast(NVT, BuildVec);
}
}
@@ -17126,9 +18311,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants.
- auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
-
if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
@@ -17141,7 +18325,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
- N->getOperand(1));
+ Index);
}
}
@@ -17154,6 +18338,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+ !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Split the wide shuffle mask into halves. Any mask element that is accessing
+ // operand 1 is offset down to account for narrowing of the vectors.
+ ArrayRef<int> Mask = Shuf->getMask();
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> Mask0(HalfNumElts, -1);
+ SmallVector<int, 16> Mask1(HalfNumElts, -1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+ if (i < HalfNumElts)
+ Mask0[i] = M;
+ else
+ Mask1[i - HalfNumElts] = M;
+ }
+
+ // Ask the target if this is a valid transform.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ HalfNumElts);
+ if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+ !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+ return SDValue();
+
+ // shuffle (concat X, undef), (concat Y, undef), Mask -->
+ // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+ SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+ SDLoc DL(Shuf);
+ SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+ SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -17163,20 +18394,24 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
SmallVector<SDValue, 4> Ops;
EVT ConcatVT = N0.getOperand(0).getValueType();
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ auto IsUndefMaskElt = [](int i) { return i == -1; };
+
// Special case: shuffle(concat(A,B)) can be more efficiently represented
// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
// half vector elements.
if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
- std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
- SVN->getMask().end(), [](int i) { return i == -1; })) {
- N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
+ llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+ IsUndefMaskElt)) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+ N0.getOperand(1),
+ Mask.slice(0, NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -17184,35 +18419,32 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
- // Make sure we're dealing with a copy.
unsigned Begin = I * NumElemsPerConcat;
- bool AllUndef = true, NoUndef = true;
- for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
- if (SVN->getMaskElt(J) >= 0)
- AllUndef = false;
- else
- NoUndef = false;
+ ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+ // Make sure we're dealing with a copy.
+ if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+ Ops.push_back(DAG.getUNDEF(ConcatVT));
+ continue;
}
- if (NoUndef) {
- if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+ int OpIdx = -1;
+ for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+ if (IsUndefMaskElt(SubMask[i]))
+ continue;
+ if ((SubMask[i] % (int)NumElemsPerConcat) != i)
return SDValue();
-
- for (unsigned J = 1; J != NumElemsPerConcat; ++J)
- if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
- return SDValue();
-
- unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
- if (FirstElt < N0.getNumOperands())
- Ops.push_back(N0.getOperand(FirstElt));
- else
- Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
-
- } else if (AllUndef) {
- Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
- } else { // Mixed with general masks and undefs, can't do optimization.
- return SDValue();
+ int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+ if (0 <= OpIdx && EltOpIdx != OpIdx)
+ return SDValue();
+ OpIdx = EltOpIdx;
}
+ assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+ if (OpIdx < (int)N0.getNumOperands())
+ Ops.push_back(N0.getOperand(OpIdx));
+ else
+ Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -17278,8 +18510,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
- Op = S.getOperand(0);
+ SDValue Op0 = S.getOperand(0);
+ Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
} else {
// Operand can't be combined - bail out.
return SDValue();
@@ -17433,11 +18665,17 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// If splat-mask contains undef elements, we need to be careful about
// introducing undef's in the folded mask which are not the result of composing
// the masks of the shuffles.
-static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
- ShuffleVectorSDNode *Splat,
- SelectionDAG &DAG) {
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ if (!Shuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Splat || !Splat->isSplat())
+ return SDValue();
+
+ ArrayRef<int> ShufMask = Shuf->getMask();
ArrayRef<int> SplatMask = Splat->getMask();
- assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+ assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
// Prefer simplifying to the splat-shuffle, if possible. This is legal if
// every undef mask element in the splat-shuffle has a corresponding undef
@@ -17463,13 +18701,13 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
return false;
return true;
};
- if (CanSimplifyToExistingSplat(UserMask, SplatMask))
- return SDValue(Splat, 0);
+ if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+ return Shuf->getOperand(0);
// Create a new shuffle with a mask that is composed of the two shuffles'
// masks.
SmallVector<int, 32> NewMask;
- for (int Idx : UserMask)
+ for (int Idx : ShufMask)
NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
@@ -17555,6 +18793,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Op0.getOperand(1), NewInsIndex);
}
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -17604,19 +18870,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
- // A shuffle of a single vector that is a splat can always be folded.
- if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
- if (N1->isUndef() && N0Shuf->isSplat())
- return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+ // A shuffle of a single vector that is a splatted value can always be folded.
+ if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+ return V;
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
- SDNode *V = N0.getNode();
+ int SplatIndex = SVN->getSplatIndex();
+ if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ // splat (vector_bo L, R), Index -->
+ // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+ SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+ SDLoc DL(N);
+ EVT EltVT = VT.getScalarType();
+ SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+ SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+ SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
+ N0.getNode()->getFlags());
+ SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+ SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+ return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+ }
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
+ SDNode *V = N0.getNode();
if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
@@ -17649,7 +18931,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
// Canonicalize any other splat as a build_vector.
- const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SDValue Splatted = V->getOperand(SplatIndex);
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
@@ -17665,6 +18947,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
@@ -17704,7 +18991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(M < 0 ? -1 : Scale * M + s);
return NewMask;
};
-
+
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
@@ -17884,6 +19171,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
}
+ if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+ return V;
+
return SDValue();
}
@@ -18006,7 +19296,44 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
- unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Push subvector bitcasts to the output, adjusting the index as we go.
+ // insert_subvector(bitcast(v), bitcast(s), c1)
+ // -> bitcast(insert_subvector(v, s, c2))
+ if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+ N1.getOpcode() == ISD::BITCAST) {
+ SDValue N0Src = peekThroughBitcasts(N0);
+ SDValue N1Src = peekThroughBitcasts(N1);
+ EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+ EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+ if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+ N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+ EVT NewVT;
+ SDLoc DL(N);
+ SDValue NewIdx;
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+ NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+ unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+ if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ }
+ }
+ if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+ SDValue Res = DAG.getBitcast(NewVT, N0Src);
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
// Canonicalize insert_subvector dag nodes.
// Example:
@@ -18070,6 +19397,36 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // VECREDUCE over 1-element vector is just an extract.
+ if (VT.getVectorNumElements() == 1) {
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+ return Res;
+ }
+
+ // On an boolean vector an and/or reduction is the same as a umin/umax
+ // reduction. Convert them if the latter is legal while the former isn't.
+ if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+ unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+ ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+ TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+ DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -18161,6 +19518,53 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ int Index0, Index1;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ if (!Src0 || !Src1 || Index0 != Index1 ||
+ Src0.getValueType().getVectorElementType() != EltVT ||
+ Src1.getValueType().getVectorElementType() != EltVT ||
+ !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue IndexC =
+ DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+ // If all lanes but 1 are undefined, no need to splat the scalar result.
+ // TODO: Keep track of undefs and use that info in the general case.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+ count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+ count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+ // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+ // build_vec ..undef, (bo X, Y), undef...
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+ Ops[Index0] = ScalarBO;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
@@ -18169,34 +19573,63 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
+ EVT VT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
return Fold;
- // Type legalization might introduce new shuffles in the DAG.
- // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
- // -> (shuffle (VBinOp (A, B)), Undef, Mask).
- if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
- isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
- LHS.getOperand(1).isUndef() &&
- RHS.getOperand(1).isUndef()) {
- ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
- ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
-
- if (SVN0->getMask().equals(SVN1->getMask())) {
- EVT VT = N->getValueType(0);
- SDValue UndefVector = LHS.getOperand(1);
- SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0),
- N->getFlags());
- AddUsersToWorklist(N);
- return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
- SVN0->getMask());
+ // Move unary shuffles with identical masks after a vector binop:
+ // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+ // --> shuffle (VBinOp A, B), Undef, Mask
+ // This does not require type legality checks because we are creating the
+ // same types of operations that are in the original sequence. We do have to
+ // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+ // though. This code is adapted from the identical transform in instcombine.
+ if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
+ Opcode != ISD::UREM && Opcode != ISD::SREM &&
+ Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDLoc DL(N);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+ RHS.getOperand(0), N->getFlags());
+ SDValue UndefV = LHS.getOperand(1);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+ }
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDLoc DL(N);
+ SDValue VecC =
+ DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
}
}
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ return V;
+
return SDValue();
}
@@ -18214,13 +19647,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
+ const SDNodeFlags Flags = N0.getNode()->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
- SCC.getOperand(4));
+ SCC.getOperand(4), Flags);
AddToWorklist(SETCC.getNode());
- return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
- SCC.getOperand(2), SCC.getOperand(3));
+ SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
}
return SCC;
@@ -18305,6 +19741,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // We can't produce a CMOV of a TargetFrameIndex since we won't
+ // generate the address generation required.
+ LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
LLD->getBasePtr().getValueType()))
return false;
@@ -18501,8 +19941,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
// If a constant can be materialized without loads, this does not make sense.
if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
- TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
- TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
return SDValue();
// If both constants have multiple uses, then we won't need to do an extra
@@ -18547,20 +19987,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2 == N3) return N2;
EVT CmpOpVT = N0.getValueType();
+ EVT CmpResVT = getSetCCResultType(CmpOpVT);
EVT VT = N2.getValueType();
auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant.
- SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
- false);
- if (SCC.getNode()) AddToWorklist(SCC.getNode());
-
- if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
- // fold select_cc true, x, y -> x
- // fold select_cc false, x, y -> y
- return !SCCC->isNullValue() ? N2 : N3;
+ if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+ AddToWorklist(SCC.getNode());
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !(SCCC->isNullValue()) ? N2 : N3;
+ }
}
if (SDValue V =
@@ -18621,7 +20061,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue Temp, SCC;
// zext (setcc n0, n1)
if (LegalTypes) {
- SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+ SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
if (VT.bitsLT(SCC.getValueType()))
Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
else
@@ -18644,36 +20084,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
getShiftAmountTy(Temp.getValueType())));
}
- // Check to see if this is an integer abs.
- // select_cc setg[te] X, 0, X, -X ->
- // select_cc setgt X, -1, X, -X ->
- // select_cc setl[te] X, 0, -X, X ->
- // select_cc setlt X, 1, -X, X ->
- // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C) {
- ConstantSDNode *SubC = nullptr;
- if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
- (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
- N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
- else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
- (N1C->isOne() && CC == ISD::SETLT)) &&
- N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
-
- if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
- SDLoc DL(N0);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
- DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
- DL,
- getShiftAmountTy(CmpOpVT)));
- SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
- AddToWorklist(Shift.getNode());
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
- }
- }
-
// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
@@ -18728,7 +20138,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18769,7 +20179,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18821,7 +20231,6 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
AddToWorklist(Est.getNode());
if (Iterations) {
- EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
@@ -18977,7 +20386,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (!Reciprocal) {
// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 for those cases.
- EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
@@ -19020,79 +20428,95 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
- // If they are the same then they must be aliases.
- if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
+bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
- // If they are both volatile then they cannot be reordered.
- if (Op0->isVolatile() && Op1->isVolatile()) return true;
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ SDValue BasePtr;
+ int64_t Offset;
+ Optional<int64_t> NumBytes;
+ MachineMemOperand *MMO;
+ };
- // If one operation reads from invariant memory, and the other may store, they
- // cannot alias. These should really be checking the equivalent of mayWrite,
- // but it only matters for memory nodes other than load /store.
- if (Op0->isInvariant() && Op1->writeMem())
- return false;
+ auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+ if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ int64_t Offset = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+ ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ LSN->getMemOperand()};
+ }
+ if (const auto *LN = cast<LifetimeSDNode>(N))
+ return {false /*isVolatile*/, LN->getOperand(1),
+ (LN->hasOffset()) ? LN->getOffset() : 0,
+ (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
+ : Optional<int64_t>(),
+ (MachineMemOperand *)nullptr};
+ // Default.
+ return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ };
- if (Op1->isInvariant() && Op0->writeMem())
- return false;
+ MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+ MUC1 = getCharacteristics(Op1);
- unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
- unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
-
- // Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
- int64_t PtrDiff;
- if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
-
- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
- // able to calculate their relative offset if at least one arises
- // from an alloca. However, these allocas cannot overlap and we
- // can infer there is no alias.
- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // If the base are the same frame index but the we couldn't find a
- // constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
- !MFI.isFixedObjectIndex(B->getIndex())))
- return false;
- }
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
-
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
return false;
}
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+ DAG, IsAlias))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+ // either are not known.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
// cases created by splitting vector types.
- int64_t SrcValOffset0 = Op0->getSrcValueOffset();
- int64_t SrcValOffset1 = Op1->getSrcValueOffset();
- unsigned OrigAlignment0 = Op0->getOriginalAlignment();
- unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
+ unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+ MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
+ *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
- (OffAlign1 + NumBytes1) <= OffAlign0)
+ if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
+ (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
return false;
}
@@ -19105,17 +20529,16 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA &&
- Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
- AliasResult AAResult =
- AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
- UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
- UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
+ int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
if (AAResult == NoAlias)
return false;
}
@@ -19132,18 +20555,64 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
+ const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
+ // Attempt to improve chain by a single step
+ std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ switch (C.getOpcode()) {
+ case ISD::EntryToken:
+ // No need to mark EntryToken.
+ C = SDValue();
+ return true;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for C.
+ bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+ !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ // Alias, so stop here.
+ return false;
+ }
+
+ case ISD::CopyFromReg:
+ // Always forward past past CopyFromReg.
+ C = C.getOperand(0);
+ return true;
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the memory access.
+ if (!isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ };
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.pop_back_val();
+ // Don't bother if we've seen Chain before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
// For TokenFactor nodes, look at each operand and only continue up the
// chain until we reach the depth limit.
//
@@ -19156,58 +20625,30 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
return;
}
- // Don't bother if we've been before.
- if (!Visited.insert(Chain.getNode()).second)
- continue;
-
- switch (Chain.getOpcode()) {
- case ISD::EntryToken:
- // Entry token is ideal chain operand, but handled in FindBetterChain.
- break;
-
- case ISD::LOAD:
- case ISD::STORE: {
- // Get alias information for Chain.
- bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
- !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
-
- // If chain is alias then stop here.
- if (!(IsLoad && IsOpLoad) &&
- isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
- Aliases.push_back(Chain);
- } else {
- // Look further up the chain.
- Chains.push_back(Chain.getOperand(0));
- ++Depth;
- }
- break;
- }
-
- case ISD::TokenFactor:
+ if (Chain.getOpcode() == ISD::TokenFactor) {
// We have to check each of the operands of the token factor for "small"
// token factors, so we queue them up. Adding the operands to the queue
// (stack) in reverse order maintains the original order and increases the
// likelihood that getNode will find a matching token factor (CSE.)
if (Chain.getNumOperands() > 16) {
Aliases.push_back(Chain);
- break;
+ continue;
}
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
++Depth;
- break;
-
- case ISD::CopyFromReg:
- // Forward past CopyFromReg.
- Chains.push_back(Chain.getOperand(0));
+ continue;
+ }
+ // Everything else
+ if (ImproveChain(Chain)) {
+ // Updated Chain Found, Consider new chain if one exists.
+ if (Chain.getNode())
+ Chains.push_back(Chain);
++Depth;
- break;
-
- default:
- // For all other instructions we will just have to take what we can get.
- Aliases.push_back(Chain);
- break;
+ continue;
}
+ // No Improved Chain Possible, treat as Alias.
+ Aliases.push_back(Chain);
}
}
@@ -19232,13 +20673,15 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return Aliases[0];
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
+ return DAG.getTokenFactor(SDLoc(N), Aliases);
}
+namespace {
// TODO: Replace with with std::monostate when we move to C++17.
struct UnitT { } Unit;
bool operator==(const UnitT &, const UnitT &) { return true; }
bool operator!=(const UnitT &, const UnitT &) { return false; }
+} // namespace
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
@@ -19349,7 +20792,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (AddNewChain)
TFOps.insert(TFOps.begin(), NewChain);
- SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
AddToWorklist(STChain);