summaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2429
1 files changed, 1603 insertions, 826 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2c7bffe76503f..4d468551ae24e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
- CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner alias-analysis heuristics"));
-
- static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
@@ -133,6 +129,9 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
@@ -177,6 +176,7 @@ namespace {
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
+ unsigned MaximumLegalStoreInBits;
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
@@ -232,9 +232,12 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitUADDO(SDNode *N);
SDValue visitSUBC(SDNode *N);
+ SDValue visitUSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
@@ -259,6 +262,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -274,6 +278,7 @@ namespace {
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertZext(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -336,6 +341,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
@@ -344,6 +350,8 @@ namespace {
bool NotExtCompare = false);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -377,6 +385,7 @@ namespace {
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -384,9 +393,9 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
- SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -416,15 +425,12 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
};
/// This is a helper function for visitMUL to check the profitability
@@ -435,12 +441,6 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
- /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
- /// constant build_vector of the stored constant values in Stores.
- SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
- ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains,
- EVT Ty) const;
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -451,34 +451,35 @@ namespace {
EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
bool &NarrowLoad);
+ /// Helper function for MergeConsecutiveStores which merges the
+ /// component store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return number of stores that were merged into a merged store (always
- /// a prefix of \p StoreNode).
- bool MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
- /// Loads that may alias with those stores are placed in AliasLoadNodes.
- void getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
/// Candidate stores have indirect dependency through their
/// operands. \return True if safe to merge
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ bool MergeConsecutiveStores(StoreSDNode *N);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -493,6 +494,13 @@ namespace {
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
}
/// Runs the dag combiner on all nodes in the work list
@@ -607,10 +615,16 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
switch (Op.getOpcode()) {
default: return false;
- case ISD::ConstantFP:
- // Don't invert constant FP values after legalize. The negated constant
- // isn't necessarily legal.
- return LegalOperations ? 0 : 1;
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ EVT VT = Op.getValueType();
+ return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ }
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
@@ -629,7 +643,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath &&
+ !Op.getNode()->getFlags()->hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -1079,37 +1094,36 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (!NN0.getNode())
- return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1;
- if (N0 == N1)
- NN1 = NN0;
- else {
- NN1 = PromoteOperand(N1, PVT, Replace1);
- if (!NN1.getNode())
- return SDValue();
- }
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
- AddToWorklist(NN0.getNode());
- if (NN1.getNode())
- AddToWorklist(NN1.getNode());
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
- if (Replace0)
+ // New replace instances of N0 and N1
+ if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
+ NN0.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN0.getNode());
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
- if (Replace1)
+ }
+
+ if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
+ NN1.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN1.getNode());
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+ }
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, NN0, NN1));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1137,26 +1151,32 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace = false;
SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
- N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
- N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = ZExtPromoteOperand(N0, PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
+
if (!N0.getNode())
return SDValue();
+ SDLoc DL(Op);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1361,8 +1381,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
- SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV);
+ DAG.ReplaceAllUsesWith(N, &RV);
}
// Push the new node and any users onto the worklist
@@ -1389,7 +1408,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
+ case ISD::USUBO: return visitUSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
@@ -1415,6 +1436,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -1430,6 +1452,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertZext: return visitAssertZext(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1574,7 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
- SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
@@ -1618,6 +1641,86 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.count(Op) != 0) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
+
SDValue Result;
// If we've changed things around then replace token factor.
@@ -1626,15 +1729,22 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
- // New and improved token factor.
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ } else {
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ }
}
- // Add users to worklist if AA is enabled, since it may introduce
- // a lot of new chained token factors while removing memory deps.
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- return CombineTo(N, Result, UseAA /*add to worklist*/);
+ // Add users to worklist, since we may introduce a lot of new
+ // chained token factors while removing memory deps.
+ return CombineTo(N, Result, true /*add to worklist*/);
}
return Result;
@@ -1664,6 +1774,60 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ auto BinOpcode = BO->getOpcode();
+ assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
+ BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
+ BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
+ BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
+ BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
+ BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
+ BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
+ BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
+ BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
+ "Unexpected binary operator");
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ SDValue C1 = BO->getOperand(1);
+ if (!isConstantOrConstantVector(C1, true) &&
+ !isConstantFPBuildVectorOrConstantFP(C1))
+ return SDValue();
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
+ EVT VT = Sel.getValueType();
+ SDLoc DL(Sel);
+ SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
+ assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
+ isConstantFPBuildVectorOrConstantFP(NewCT)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
+ assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
+ isConstantFPBuildVectorOrConstantFP(NewCF)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1712,6 +1876,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
@@ -1774,6 +1941,19 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ if (SDValue Combined = visitADDLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLike(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
@@ -1781,12 +1961,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N1,
- DAG.getNode(ISD::SHL, DL, VT,
- N0.getOperand(0).getOperand(1),
- N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
@@ -1797,7 +1971,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits &&
isOneConstantOrOneSplatConstant(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
// add (sext i1), X -> sub X, (zext i1)
@@ -1825,39 +1999,61 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ DL, MVT::Glue));
- // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
+ return SDValue();
+}
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
- }
+SDValue DAGCombiner::visitUADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+
+ // fold (uaddo x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
@@ -1920,6 +2116,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
N1.getNode());
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (sub x, c) -> (add x, -c)
@@ -2066,6 +2265,38 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (usubo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // fold (usubo x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2131,6 +2362,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue()) {
SDLoc DL(N);
@@ -2297,6 +2532,23 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
return combined;
}
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2319,8 +2571,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
@@ -2372,7 +2629,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2384,13 +2641,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2414,6 +2664,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -2444,7 +2700,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2456,13 +2712,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2482,32 +2731,35 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -2536,13 +2788,6 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
- // undef % X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X % undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2932,95 +3177,139 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullConstantOrNullSplatConstant(LR);
+ bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
+ : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
-SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// fold (and x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(LocReference), VT);
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- if (isAllOnesConstant(LR)) {
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (Op1 == ISD::SETGT) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
- (isAllOnesConstant(LR) && isNullConstant(RR)))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
@@ -3037,13 +3326,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDLoc DL(N0);
+ SDLoc DL0(N0);
SDValue NewAdd =
- DAG.getNode(ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL0, VT,
N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
- return SDValue(LocReference, 0);
+ return SDValue(N, 0);
}
}
}
@@ -3068,7 +3357,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
- if (APIntOps::isMask(AndMask) &&
+ if (AndMask.isMask() &&
// Required bits must not span the two halves of the integer and
// must fit in the half size type.
(ShiftBits + MaskBits <= Size / 2) &&
@@ -3108,7 +3397,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
bool &NarrowLoad) {
uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
- if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
return false;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -3191,6 +3480,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
@@ -3299,6 +3592,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
@@ -3316,10 +3613,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // Fold the AND away, taking care not to fold to the old load node if we
- // replaced it.
- CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
-
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3723,65 +4016,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
-SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
// fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.isUndef() || N1.isUndef())) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
- SDLoc(LocReference), VT);
- }
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
@@ -3802,7 +4046,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
- SDLoc DL(LocReference);
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
@@ -3818,7 +4061,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
}
return SDValue();
@@ -3847,14 +4090,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
- N0.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
- N1.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -3867,7 +4106,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
// Ensure both shuffles have a zero input.
- if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
@@ -3939,6 +4178,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -3956,7 +4199,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) == 0.
+ // iff (c1 & c2) != 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
@@ -3978,6 +4221,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
@@ -4190,8 +4436,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
- SDValue Mask = DAG.getConstant(AllBits, DL, VT);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
if (LHSMask.getNode()) {
APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
@@ -4349,6 +4594,299 @@ struct BaseIndexOffset {
};
} // namespace
+namespace {
+/// Represents known origin of an individual byte in load combine pattern. The
+/// value of the byte is either constant zero or comes from memory.
+struct ByteProvider {
+ // For constant zero providers Load is set to nullptr. For memory providers
+ // Load represents the node which loads the byte from memory.
+ // ByteOffset is the offset of the byte in the value produced by the load.
+ LoadSDNode *Load;
+ unsigned ByteOffset;
+
+ ByteProvider() : Load(nullptr), ByteOffset(0) {}
+
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
+ return ByteProvider(Load, ByteOffset);
+ }
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+
+ bool isConstantZero() const { return !Load; }
+ bool isMemory() const { return Load; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ }
+
+private:
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
+ : Load(Load), ByteOffset(ByteOffset) {}
+};
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns None if the provider can't be calculated.
+///
+/// For all the values except the root of the expression verifies that the value
+/// has exactly one use and if it's not true return None. This way if the origin
+/// of the byte is returned it's guaranteed that the values which contribute to
+/// the byte are not used outside of this expression.
+///
+/// Because the parts of the expression are not allowed to have more than one
+/// use this function iterates over trees, not DAGs. So it never visits the same
+/// node more than once.
+const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
+ unsigned Depth,
+ bool Root = false) {
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return None;
+
+ if (!Root && !Op.hasOneUse())
+ return None;
+
+ assert(Op.getValueType().isScalarInteger() && "can't handle other types");
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return None;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ if (!LHS)
+ return None;
+ auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ if (!RHS)
+ return None;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return None;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return None;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return None;
+ uint64_t ByteShift = BitShift / 8;
+
+ return Index < ByteShift
+ ? ByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1);
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (L->isVolatile() || L->isIndexed())
+ return None;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return ByteProvider::getMemory(L, Index);
+ }
+ }
+
+ return None;
+}
+} // namespace
+
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
+ unsigned BW, unsigned i) { return i; };
+ std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
+ unsigned BW, unsigned i) { return BW - i - 1; };
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&] (ByteProvider P) {
+ assert(P.isMemory() && "Must be a memory byte provider");
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget
+ ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ };
+
+ Optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallSet<LoadSDNode *, 8> Loads;
+ Optional<ByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ return SDValue();
+
+ LoadSDNode *L = P->Load;
+ assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ "Must be enforced by calculateByteProvider");
+ assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+ assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
+ if (!BigEndian && !LittleEndian)
+ return SDValue();
+ }
+ assert((BigEndian != LittleEndian) && "should be either or");
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ LoadSDNode *FirstLoad = FirstByteProvider->Load;
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single load and bswap if needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != BigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, FirstLoad->getAddressSpace(),
+ FirstLoad->getAlignment(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4386,6 +4924,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate xor
if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
return RXOR;
@@ -4403,9 +4945,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
- return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
- return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
}
}
@@ -4470,6 +5012,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -4673,6 +5226,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl undef, x) -> 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -4808,9 +5365,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
- unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
@@ -4877,6 +5433,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
@@ -5024,6 +5584,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -5074,9 +5638,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
- APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
@@ -5202,6 +5765,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5217,7 +5796,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
@@ -5311,7 +5894,6 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
-// TODO: We should handle other cases of selecting between {-1,0,1} here.
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5320,6 +5902,67 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ if (CondVT == MVT::i1 && !LegalOperations) {
+ if (C1->isNullValue() && C2->isOne()) {
+ // select Cond, 0, 1 --> zext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isNullValue() && C2->isAllOnesValue()) {
+ // select Cond, 0, -1 --> sext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isOne() && C2->isNullValue()) {
+ // select Cond, 1, 0 --> zext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+ if (C1->isAllOnesValue() && C2->isNullValue()) {
+ // select Cond, -1, 0 --> sext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add. Use a target hook because some targets may prefer to
+ // transform in the other direction.
+ if (TLI.convertSelectOfConstantsToMath()) {
+ if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ }
+
+ return SDValue();
+ }
+
// fold (select Cond, 0, 1) -> (xor Cond, 1)
// We can't do this reliably if integer based booleans have different contents
// to floating point based booleans. This is because we can't tell whether we
@@ -5329,15 +5972,14 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
- if (VT.isInteger() &&
- (CondVT == MVT::i1 || (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
- DAG.getConstant(1, DL, CondVT));
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isNullValue() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
return NotCond;
return DAG.getZExtOrTrunc(NotCond, DL, VT);
@@ -5847,7 +6489,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5921,34 +6563,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // If the VSELECT result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (N0.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
-
- // Add the new VSELECT nodes to the work list in case they need to be split
- // again.
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
-
// Fold (vselect (build_vector all_ones), N1, N2) -> N1
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N1;
@@ -6258,6 +6872,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
CombineTo(N, NewValue);
// Replace uses of the original load (before extension)
@@ -6273,6 +6890,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
LegalOperations))
@@ -6281,8 +6899,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
@@ -6314,12 +6931,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -6329,7 +6946,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
@@ -6349,16 +6966,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6376,8 +6991,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
@@ -6411,7 +7025,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
@@ -6419,24 +7032,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
if (N0.getOpcode() == ISD::SETCC) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT N00VT = N0.getOperand(0).getValueType();
+
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(N0VT) ==
+ TLI.getBooleanContents(N00VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
- EVT SVT = getSetCCResultType(N0VT);
+ EVT SVT = getSetCCResultType(N00VT);
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
@@ -6444,19 +7060,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
// If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
}
}
@@ -6465,36 +7077,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// getBooleanContents().
unsigned SetCCWidth = N0.getScalarValueSizeInBits();
- SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
- // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal =
- (SetCCWidth == 1)
- ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
- DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
-
- if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
- DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
+ : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC =
+ SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
if (!VT.isVector()) {
- EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
- SDLoc DL(N);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC =
- DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
- return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
- DAG.getConstant(0, DL, VT));
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
}
}
}
@@ -6502,7 +7108,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
return SDValue();
}
@@ -6677,13 +7283,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ZERO_EXTEND);
+ CombineTo(N, ExtLoad);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6991,9 +7598,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
+ if (N0.getOpcode() == ISD::AssertZext &&
+ EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ return SDValue();
+}
+
/// See if the specified operand can be simplified with the knowledge that only
/// the bits specified by Mask are used. If so, return the simpler operand,
/// otherwise return a null SDValue.
+///
+/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+/// simplify nodes with multiple uses more aggressively.)
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -7029,6 +7652,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
+ return V.getOperand(0);
+ break;
+ }
}
return SDValue();
}
@@ -7244,6 +7875,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+ return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ }
+
// fold (sext_in_reg (zext x)) -> (sext x)
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
@@ -7254,7 +7895,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
@@ -7496,6 +8137,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
VT.getSizeInBits())))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
+
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -7517,6 +8159,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
}
+
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
@@ -7582,6 +8225,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
+ !N0.getNode()->hasAnyUseOfValue(1) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
+ SDLoc SL(N);
+ auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
+ X, Y, N0.getOperand(2));
+ }
+
return SDValue();
}
@@ -7672,6 +8327,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
// on the bitconvert.
@@ -8040,6 +8698,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getBuildVector(VT, DL, Ops);
}
+static bool isContractable(SDNode *N) {
+ SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags;
+ return F.hasAllowContract() || F.hasUnsafeAlgebra();
+}
+
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -8048,24 +8711,27 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- ;
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8073,35 +8739,39 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && N0.getOpcode() == ISD::FMUL &&
- N1.getOpcode() == ISD::FMUL) {
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
if (N0.getNode()->use_size() > N1.getNode()->use_size())
std::swap(N0, N1);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse())) {
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8113,7 +8783,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -8154,7 +8824,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -8169,7 +8839,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1);
@@ -8195,7 +8865,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1);
@@ -8208,7 +8878,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
- if (N120.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N120))
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0);
@@ -8224,7 +8894,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
- if (N102.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N102))
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0);
@@ -8244,23 +8914,26 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
-
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8268,9 +8941,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
@@ -8278,16 +8958,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
@@ -8297,12 +8975,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8316,7 +8994,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8336,7 +9014,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8358,7 +9036,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8378,10 +9056,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
+ N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8395,9 +9072,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8410,14 +9086,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8440,7 +9116,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8461,7 +9137,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (N120.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N120)) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8488,7 +9164,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = N1.getOperand(0).getOperand(0);
SDValue N101 = N1.getOperand(0).getOperand(1);
SDValue N102 = N1.getOperand(0).getOperand(2);
- if (N102.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N102)) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8624,6 +9300,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
@@ -8637,7 +9316,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
@@ -8771,13 +9450,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -8850,6 +9532,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -9104,6 +9789,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -9207,6 +9895,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
&cast<BinaryWithFlagsSDNode>(N)->Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
return SDValue();
}
@@ -10361,7 +11052,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
-
+ AddUsersToWorklist(Chain.getNode());
if (N->use_empty())
deleteAndRecombine(N);
@@ -10414,7 +11105,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, Chain.getOperand(1), Chain);
+ return CombineTo(N, PrevST->getOperand(1), Chain);
}
}
@@ -10432,14 +11123,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && LD->isUnindexed()) {
+ if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -11021,6 +11705,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
return true;
}
@@ -11414,18 +12099,24 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-SDValue DAGCombiner::getMergedConstantVectorStore(
- SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
- SmallVector<SDValue, 8> BuildVector;
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
- StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
- Chains.push_back(St->getChain());
- BuildVector.push_back(St->getValue());
+ // don't include nodes that are children
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- return DAG.getBuildVector(Ty, SL, BuildVector);
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -11436,22 +12127,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned LatestNodeUsed = 0;
-
- for (unsigned i=0; i < NumStores; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
- }
-
- SmallVector<SDValue, 8> Chains;
// The latest Node in the DAG.
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
@@ -11467,7 +12144,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ if (MemVT.getScalarType().isInteger())
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
+ Val = DAG.getConstant(
+ (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
+ SDLoc(CFP), MemVT);
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
@@ -11477,7 +12165,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
- Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
@@ -11497,7 +12184,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- Chains.push_back(St->getChain());
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
@@ -11515,54 +12201,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- assert(!Chains.empty());
-
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA) {
- // Replace all merged stores with the new store.
- for (unsigned i = 0; i < NumStores; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumStores; ++i) {
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
- StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
+ AddToWorklist(NewChain.getNode());
return true;
}
-void DAGCombiner::getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
@@ -11572,104 +12231,71 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (BasePtr.Base.isUndef())
return;
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- EVT MemVT = St->getMemoryVT();
- unsigned Seq = 0;
- StoreSDNode *Index = St;
-
-
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-
- if (UseAA) {
- // Look at other users of the same chain. Stores on the same chain do not
- // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
- // to be on the same chain, so don't bother looking at adjacent chains.
-
- SDValue Chain = St->getChain();
- for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- if (I.getOperandNo() != 0)
- continue;
-
- if (OtherST->isVolatile() || OtherST->isIndexed())
- continue;
-
- if (OtherST->getMemoryVT() != MemVT)
- continue;
-
- BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
-
- if (Ptr.equalBaseIndex(BasePtr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
- }
- }
-
- return;
- }
-
- while (Index) {
- // If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
- break;
-
- // Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
-
- // Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
- break;
-
- // The memory operands must not be volatile.
- if (Index->isVolatile() || Index->isIndexed())
- break;
-
- // No truncation.
- if (Index->isTruncatingStore())
- break;
-
- // The stored memory type must be the same.
- if (Index->getMemoryVT() != MemVT)
- break;
-
- // We do not allow under-aligned stores in order to prevent
- // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
- // be irrelevant here; what MATTERS is that we not move memory
- // operations that potentially overlap past each-other.
- if (Index->getAlignment() < MemVT.getStoreSize())
- break;
-
- // We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
-
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
- SDNode *NextInChain = Index->getChain().getNode();
- while (1) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- if (Ldn->isVolatile()) {
- Index = nullptr;
- break;
+ bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
+ isa<ConstantFPSDNode>(St->getValue());
+ bool IsExtractVecSrc =
+ (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
+ if (Other->isVolatile() || Other->isIndexed())
+ return false;
+ // We can merge constant floats to equivalent integers
+ if (Other->getMemoryVT() != MemVT)
+ if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsLoadSrc)
+ if (!isa<LoadSDNode>(Other->getValue()))
+ return false;
+ if (IsConstantSrc)
+ if (!(isa<ConstantSDNode>(Other->getValue()) ||
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsExtractVecSrc)
+ if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ return false;
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ return (Ptr.equalBaseIndex(BasePtr));
+ };
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ SDNode *RootNode = (St->getChain()).getNode();
+
+ if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ if (I2.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ }
+ } else
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
}
-
- // Save the load node for later. Continue the scan.
- AliasLoadNodes.push_back(Ldn);
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }
- }
}
// We need to check that merging these stores does not cause a loop
@@ -11678,31 +12304,34 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
// search ops of store candidates
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
SDNode *n = StoreNodes[i].MemNode;
// Potential loops may happen only through non-chain operands
for (unsigned j = 1; j < n->getNumOperands(); ++j)
Worklist.push_back(n->getOperand(j).getNode());
}
// search through DAG. We can stop early if we find a storenode
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
return false;
}
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+
+ if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat);
@@ -11731,145 +12360,137 @@ bool DAGCombiner::MergeConsecutiveStores(
if (MemVT.isVector() && IsLoadSrc)
return false;
- // Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 0);
- if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
- return false;
-
- // Save the LoadSDNodes that we find in the chain.
- // We need to make sure that these nodes do not interfere with
- // any of the store nodes.
- SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
- getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+ SmallVector<MemOpLink, 8> StoreNodes;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dependence check in AA case
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
- return false;
-
// Sort the memory operands according to their distance from the
- // base pointer. As a secondary criteria: make sure stores coming
- // later in the code come first in the list. This is important for
- // the non-UseAA case, because we're merging stores into the FINAL
- // store along a chain which potentially contains aliasing stores.
- // Thus, if there are multiple stores to the same address, the last
- // one can be considered for merging but not the others.
+ // base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase ||
- (LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum < RHS.SequenceNum);
- });
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
- unsigned LastConsecutiveStore = 0;
+ unsigned NumConsecutiveStores = 0;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
- // Check that the addresses are consecutive starting from the second
- // element in the list of stores.
- if (i > 0) {
- int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- }
-
- // Check if this store interferes with any of the loads that we found.
- // If we find a load that alias with this store. Stop the sequence.
- if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
-
- // Mark this node as useful.
- LastConsecutiveStore = i;
+ NumConsecutiveStores = i + 1;
}
+ if (NumConsecutiveStores < 2)
+ return false;
+
+ // Check that we can merge these candidates without causing a cycle
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores))
+ return false;
+
+
// The node with the lowest store address.
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
- bool NonZero = false;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
- }
+ bool RV = false;
+ while (NumConsecutiveStores > 1) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C =
+ dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non-constant.
+ break;
+ }
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast) {
- LastLegalType = i+1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
- FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- // Check if we found a legal integer type to store.
- if (LastLegalType == 0 && LastLegalVectorType == 0)
- return false;
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (LastLegalType < 2 && LastLegalVectorType < 2)
+ break;
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
+ bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ if (!Merged)
+ break;
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ RV = true;
+ NumConsecutiveStores -= NumElem;
+ }
+ return RV;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 0;
bool IsVec = MemVT.isVector();
- for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
@@ -11909,7 +12530,7 @@ bool DAGCombiner::MergeConsecutiveStores(
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;
@@ -11942,7 +12563,7 @@ bool DAGCombiner::MergeConsecutiveStores(
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
}
if (LoadNodes.size() < 2)
@@ -11954,7 +12575,9 @@ bool DAGCombiner::MergeConsecutiveStores(
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
St->getAlignment() >= RequiredAlignment)
return false;
-
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
@@ -12023,31 +12646,12 @@ bool DAGCombiner::MergeConsecutiveStores(
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
- unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2)
return false;
- // Collect the chains from all merged stores.
- SmallVector<SDValue, 8> MergeStoreChains;
- MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
-
- // The latest Node in the DAG.
- unsigned LatestNodeUsed = 0;
- for (unsigned i=1; i<NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
-
- MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
- }
-
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
-
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
@@ -12067,8 +12671,9 @@ bool DAGCombiner::MergeConsecutiveStores(
FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign);
- SDValue NewStoreChain =
- DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+
+ AddToWorklist(NewStoreChain.getNode());
SDValue NewStore =
DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
@@ -12081,25 +12686,9 @@ bool DAGCombiner::MergeConsecutiveStores(
SDValue(NewLoad.getNode(), 1));
}
- if (UseAA) {
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem; ++i) {
- // Remove all Store nodes.
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
-
- StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
return true;
}
@@ -12256,19 +12845,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && ST->isUnindexed()) {
- // FIXME: We should do this even without AA enabled. AA will just allow
- // FindBetterChain to work in more situations. The problem with this is that
- // any combine that expects memory operations to be on consecutive chains
- // first needs to be updated to look for users of the same chain.
-
+ if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
if (findBetterNeighborChains(ST)) {
@@ -12302,8 +12879,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SimplifyDemandedBits(
Value,
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits())))
+ ST->getMemoryVT().getScalarSizeInBits()))) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -12347,15 +12931,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
- bool Changed = MergeConsecutiveStores(ST, StoreNodes);
+ bool Changed = MergeConsecutiveStores(ST);
if (!Changed) break;
-
- if (any_of(StoreNodes,
- [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
- // ST has been merged and no longer exists.
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
return SDValue(N, 0);
- }
}
}
@@ -12364,7 +12945,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Make sure to do this only after attempting to merge stores in order to
// avoid changing the types of some subset of stores due to visit order,
// preventing their merging.
- if (isa<ConstantFPSDNode>(Value)) {
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
if (SDValue NewSt = replaceStoreOfFPConstant(ST))
return NewSt;
}
@@ -12493,10 +13074,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
-
// Check that we know which element is being inserted
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -12523,6 +13100,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -12544,11 +13125,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
- if (InVal.getValueType() != OpVT)
- InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
- Ops[Elt] = InVal;
+ Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
}
// Return the new vector
@@ -12568,6 +13145,11 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
+ ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
+ ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
Align = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
@@ -12639,6 +13221,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
EVT NVT = N->getValueType(0);
+ if (InVec.isUndef())
+ return DAG.getUNDEF(NVT);
+
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
@@ -13022,7 +13607,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx) {
@@ -13300,6 +13885,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -13491,8 +14105,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
- VT.getSizeInBits() / SclTy.getSizeInBits());
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
@@ -13611,15 +14228,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V->getOperand(0).getValueType() != NVT)
- return SDValue();
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ V->getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -13633,19 +14254,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
- // being extracted are of same type, and are half size of larger vectors.
- EVT BigVT = V->getOperand(0).getValueType();
+ // being extracted are of same size.
EVT SmallVT = V->getOperand(1).getValueType();
- if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants with the same type.
+ // Only handle cases where both indexes are constants.
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx && ExtIdx &&
- InsIdx->getValueType(0).getSizeInBits() <= 64 &&
- ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
@@ -13892,6 +14510,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue N0 = SVN->getOperand(0);
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ if (!isAnyExtend(Scale))
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
+ DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ }
+
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = SVN->getOperand(0);
+ while (N0.getOpcode() == ISD::BITCAST)
+ N0 = N0.getOperand(0);
+
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
+ // power-of-2 truncations as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
+ if (isTruncate(Scale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13996,6 +14721,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
return S;
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.isUndef() ||
@@ -14253,6 +14986,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -14262,26 +15005,39 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
- if (N0.getValueType() != N1.getValueType())
+ if (!isa<ConstantSDNode>(N2))
return SDValue();
+ unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
// If the input vector is a concatenation, and the insert replaces
- // one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
- N2.getOpcode() == ISD::Constant) {
- APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType()) {
+ unsigned Factor = N1.getValueType().getVectorNumElements();
- // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors Z, Y)
- if (InsIdx == 0)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
- N0.getOperand(1));
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
- // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors X, Z)
- if (InsIdx == VT.getVectorNumElements() / 2)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
- N1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
return SDValue();
@@ -15257,7 +16013,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
if (Base.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
Base = Base.getOperand(0);
- Offset += C->getZExtValue();
+ Offset += C->getSExtValue();
}
}
@@ -15454,6 +16210,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
++Depth;
break;
+ case ISD::CopyFromReg:
+ // Forward past CopyFromReg.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ break;
+
default:
// For all other instructions we will just have to take what we can get.
Aliases.push_back(Chain);
@@ -15482,6 +16244,18 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because MergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running MergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
@@ -15517,10 +16291,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (!Ptr.equalBaseIndex(BasePtr))
break;
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
+ // Walk up the chain to find the next store node, ignoring any
+ // intermediate loads. Any other kind of node will halt the loop.
SDNode *NextInChain = Index->getChain().getNode();
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
@@ -15539,9 +16311,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- }
+ } // end while
}
+ // At this point, ChainedStores lists all of the Store nodes
+ // reachable by iterating up through chain nodes matching the above
+ // conditions. For each such store identified, try to find an
+ // earlier chain to attach the store to which won't violate the
+ // required ordering.
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;