summaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3533
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp245
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp94
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp353
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp56
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp268
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h125
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp265
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp225
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h47
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp201
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp36
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp844
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp64
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp923
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h46
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp80
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp644
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp918
28 files changed, 5961 insertions, 3298 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3218dce8f575..7a99687757f8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -36,7 +36,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
@@ -60,6 +59,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -122,7 +122,7 @@ namespace {
bool LegalTypes = false;
bool ForCodeSize;
- /// \brief Worklist of all of the nodes that need to be simplified.
+ /// Worklist of all of the nodes that need to be simplified.
///
/// This must behave as a stack -- new nodes to process are pushed onto the
/// back and when processing we pop off of the back.
@@ -131,14 +131,14 @@ namespace {
/// due to nodes being deleted from the underlying DAG.
SmallVector<SDNode *, 64> Worklist;
- /// \brief Mapping from an SDNode to its position on the worklist.
+ /// Mapping from an SDNode to its position on the worklist.
///
/// This is used to find and remove nodes from the worklist (by nulling
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
- /// \brief Set of nodes which have been combined (at least once).
+ /// Set of nodes which have been combined (at least once).
///
/// This is used to allow us to reliably add any operands of a DAG node
/// which have not yet been combined to the worklist.
@@ -232,14 +232,25 @@ namespace {
return SimplifyDemandedBits(Op, Demanded);
}
+ /// Check the specified vector node value to see if it can be simplified or
+ /// if things it uses can be simplified as it only uses some of the
+ /// elements. If so, return true.
+ bool SimplifyDemandedVectorElts(SDValue Op) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ APInt Demanded = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, Demanded);
+ }
+
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
- /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
@@ -258,10 +269,6 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
- void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
- SDValue ExtLoad, const SDLoc &DL,
- ISD::NodeType ExtType);
-
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
@@ -292,7 +299,9 @@ namespace {
SDValue visitMUL(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
+ SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitUDIV(SDNode *N);
+ SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
@@ -302,9 +311,9 @@ namespace {
SDValue visitUMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
- SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitOR(SDNode *N);
- SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue visitSHL(SDNode *N);
@@ -323,7 +332,6 @@ namespace {
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
- SDValue visitSETCCE(SDNode *N);
SDValue visitSETCCCARRY(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
@@ -385,8 +393,8 @@ namespace {
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
- SDValue RHS);
+ SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -403,8 +411,11 @@ namespace {
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
+ SDValue unfoldMaskedMerge(SDNode *N);
+ SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
- const SDLoc &DL, bool foldBooleans = true);
+ const SDLoc &DL, bool foldBooleans);
+ SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
@@ -414,20 +425,21 @@ namespace {
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
+ SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
- SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
+ SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
- SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
- SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
SDNodeFlags Flags, bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
@@ -442,13 +454,14 @@ namespace {
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx);
- SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
+ SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
@@ -500,15 +513,15 @@ namespace {
bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
EVT LoadResultTy, EVT &ExtVT);
- /// Helper function to calculate whether the given Load can have its
+ /// Helper function to calculate whether the given Load/Store can have its
/// width reduced to ExtVT.
- bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
- EVT &ExtVT, unsigned ShAmt = 0);
+ bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
+ EVT &MemVT, unsigned ShAmt = 0);
/// Used by BackwardsPropagateMask to find suitable loads.
bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
- SmallPtrSetImpl<SDNode*> &NodeWithConsts,
- ConstantSDNode *Mask, SDNode *&UncombinedNode);
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
@@ -530,23 +543,28 @@ namespace {
/// This is a helper function for MergeConsecutiveStores. Stores
/// that potentially may be merged with St are placed in
- /// StoreNodes.
+ /// StoreNodes. RootNode is a chain predecessor to all store
+ /// candidates.
void getStoreMergeCandidates(StoreSDNode *St,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&Root);
/// Helper function for MergeConsecutiveStores. Checks if
/// candidate stores have indirect dependency through their
- /// operands. \return True if safe to merge.
+ /// operands. RootNode is the predecessor to all stores calculated
+ /// by getStoreMergeCandidates and is used to prune the dependency check.
+ /// \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N);
+ bool MergeConsecutiveStores(StoreSDNode *St);
- /// \brief Try to transform a truncation where C is a constant:
+ /// Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
///
/// \p N needs to be a truncation and its first operand an AND. Other
@@ -554,6 +572,16 @@ namespace {
/// single-use) and if missed an empty SDValue is returned.
SDValue distributeTruncateThroughAnd(SDNode *N);
+ /// Helper function to determine whether the target supports operation
+ /// given by \p Opcode for type \p VT, that is, whether the operation
+ /// is legal or custom before legalizing operations, and whether is
+ /// legal (but not custom) after legalization.
+ bool hasOperation(unsigned Opcode, EVT VT) {
+ if (LegalOperations)
+ return TLI.isOperationLegal(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT);
+ }
+
public:
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -564,11 +592,7 @@ namespace {
/// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
- if (LHSTy.isVector())
- return LHSTy;
- auto &DL = DAG.getDataLayout();
- return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
- : TLI.getPointerTy(DL);
+ return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
}
/// This method returns true if we are running before type legalization or
@@ -582,6 +606,10 @@ namespace {
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
+
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType);
};
/// This class is a DAGUpdateListener that removes any deleted
@@ -657,8 +685,13 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
- // Don't allow anything with multiple uses.
- if (!Op.hasOneUse()) return 0;
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ if (!Op.hasOneUse())
+ if (!(Op.getOpcode() == ISD::FP_EXTEND &&
+ TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
// Don't recurse exponentially.
if (Depth > 6) return 0;
@@ -671,17 +704,15 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
- EVT VT = Op.getValueType();
return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
}
case ISD::FADD:
- // FIXME: determine better conditions for this xform.
- if (!Options->UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
+ return 0;
// After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations &&
- !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
return 0;
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
@@ -694,7 +725,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->NoSignedZerosFPMath &&
- !Op.getNode()->getFlags().hasNoSignedZeros())
+ !Flags.hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -702,8 +733,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FMUL:
case ISD::FDIV:
- if (Options->HonorSignDependentRoundingFPMath()) return 0;
-
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
@@ -727,9 +756,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
- // Don't allow anything with multiple uses.
- assert(Op.hasOneUse() && "Unknown reuse!");
-
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
const SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -742,8 +768,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
case ISD::FADD:
- // FIXME: determine better conditions for this xform.
- assert(Options.UnsafeFPMath);
+ assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
@@ -769,8 +794,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!Options.HonorSignDependentRoundingFPMath());
-
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))
@@ -846,7 +869,13 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-// \brief Returns the SDNode if it is a constant float BuildVector
+static SDValue peekThroughBitcast(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
+// Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
if (isa<ConstantFPSDNode>(N))
@@ -880,6 +909,7 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
// constant null integer (with no undefs).
// Build vector implicit truncation is not an issue for null values.
static bool isNullConstantOrNullSplatConstant(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isNullValue();
return false;
@@ -889,6 +919,7 @@ static bool isNullConstantOrNullSplatConstant(SDValue N) {
// constant integer of one (with no undefs).
// Do not permit build vector implicit truncation.
static bool isOneConstantOrOneSplatConstant(SDValue N) {
+ // TODO: may want to use peekThroughBitcast() here.
unsigned BitWidth = N.getScalarValueSizeInBits();
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
@@ -899,6 +930,7 @@ static bool isOneConstantOrOneSplatConstant(SDValue N) {
// constant integer of all ones (with no undefs).
// Do not permit build vector implicit truncation.
static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+ N = peekThroughBitcast(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
if (ConstantSDNode *Splat = isConstOrConstSplat(N))
return Splat->isAllOnesValue() &&
@@ -913,56 +945,6 @@ static bool isAnyConstantBuildVector(const SDNode *N) {
ISD::isBuildVectorOfConstantFPSDNodes(N);
}
-// Attempt to match a unary predicate against a scalar/splat constant or
-// every element of a constant BUILD_VECTOR.
-static bool matchUnaryPredicate(SDValue Op,
- std::function<bool(ConstantSDNode *)> Match) {
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
- return Match(Cst);
-
- if (ISD::BUILD_VECTOR != Op.getOpcode())
- return false;
-
- EVT SVT = Op.getValueType().getScalarType();
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
- if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
- return false;
- }
- return true;
-}
-
-// Attempt to match a binary predicate against a pair of scalar/splat constants
-// or every element of a pair of constant BUILD_VECTORs.
-static bool matchBinaryPredicate(
- SDValue LHS, SDValue RHS,
- std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
- if (LHS.getValueType() != RHS.getValueType())
- return false;
-
- if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
- if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
- return Match(LHSCst, RHSCst);
-
- if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
- ISD::BUILD_VECTOR != RHS.getOpcode())
- return false;
-
- EVT SVT = LHS.getValueType().getScalarType();
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
- auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
- if (!LHSCst || !RHSCst)
- return false;
- if (LHSCst->getValueType(0) != SVT ||
- LHSCst->getValueType(0) != RHSCst->getValueType(0))
- return false;
- if (!Match(LHSCst, RHSCst))
- return false;
- }
- return true;
-}
-
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@@ -1013,11 +995,9 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.1 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- To[0].getNode()->dump(&DAG);
- dbgs() << " and " << NumTo-1 << " other values\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo - 1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
@@ -1074,11 +1054,33 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
- TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: ";
- TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+/// Check the specified vector node value to see if it can be simplified or
+/// if things it uses can be simplified as it only uses some of the elements.
+/// If so, return true.
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool AssumeSingleUse) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownUndef, KnownZero;
+ if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
+ 0, AssumeSingleUse))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
@@ -1089,11 +1091,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
- DEBUG(dbgs() << "\nReplacing.9 ";
- Load->dump(&DAG);
- dbgs() << "\nWith: ";
- Trunc.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
@@ -1107,10 +1106,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (ISD::isUNINDEXEDLoad(Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
- : LD->getExtensionType();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
@@ -1194,7 +1191,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
@@ -1259,7 +1256,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
bool Replace = false;
SDValue N0 = Op.getOperand(0);
@@ -1311,8 +1308,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) {
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
@@ -1345,20 +1341,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
- : ISD::EXTLOAD)
- : LD->getExtensionType();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
+ : LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
- DEBUG(dbgs() << "\nPromoting ";
- N->dump(&DAG);
- dbgs() << "\nTo: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
@@ -1369,7 +1360,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
return false;
}
-/// \brief Recursively delete a node which has no uses and any operands for
+/// Recursively delete a node which has no uses and any operands for
/// which it is the only use.
///
/// Note that this both deletes the nodes and removes them from the worklist.
@@ -1453,7 +1444,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
continue;
}
- DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
@@ -1481,8 +1472,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << " ... into: ";
- RV.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
@@ -1558,7 +1548,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
- case ISD::SETCCE: return visitSETCCE(N);
case ISD::SETCCCARRY: return visitSETCCCARRY(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
@@ -1708,6 +1697,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
return N->getOperand(1);
}
+ // Don't simplify token factors if optnone.
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1893,16 +1886,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
"Unexpected binary operator");
- // Bail out if any constants are opaque because we can't constant fold those.
- SDValue C1 = BO->getOperand(1);
- if (!isConstantOrConstantVector(C1, true) &&
- !isConstantFPBuildVectorOrConstantFP(C1))
- return SDValue();
-
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
// TODO: Handle ISD::SELECT_CC.
+ unsigned SelOpNo = 0;
SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+ SelOpNo = 1;
+ Sel = BO->getOperand(1);
+ }
+
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
return SDValue();
@@ -1916,19 +1909,48 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
+ // Bail out if any constants are opaque because we can't constant fold those.
+ // The exception is "and" and "or" with either 0 or -1 in which case we can
+ // propagate non constant operands into select. I.e.:
+ // and (select Cond, 0, -1), X --> select Cond, 0, X
+ // or X, (select Cond, -1, 0) --> select Cond, -1, X
+ bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+ (isNullConstantOrNullSplatConstant(CT) ||
+ isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
+ (isNullConstantOrNullSplatConstant(CF) ||
+ isAllOnesConstantOrAllOnesSplatConstant(CF));
+
+ SDValue CBO = BO->getOperand(SelOpNo ^ 1);
+ if (!CanFoldNonConst &&
+ !isConstantOrConstantVector(CBO, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CBO))
+ return SDValue();
+
+ EVT VT = Sel.getValueType();
+
+ // In case of shift value and shift amount may have different VT. For instance
+ // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
+ // swapped operands and value types do not match. NB: x86 is fine if operands
+ // are not swapped with shift amount VT being not bigger than shifted value.
+ // TODO: that is possible to check for a shift operation, correct VTs and
+ // still perform optimization on x86 if needed.
+ if (SelOpNo && VT != CBO.getValueType())
+ return SDValue();
+
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
- // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
- EVT VT = Sel.getValueType();
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
SDLoc DL(Sel);
- SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
- if (!NewCT.isUndef() &&
+ SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
+ : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
+ if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
- SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
- if (!NewCF.isUndef() &&
+ SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
+ : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
+ if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
@@ -1936,6 +1958,84 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
}
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // Match a constant operand and a zext operand for the math instruction:
+ // add Z, C
+ // sub C, Z
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ auto *CN = dyn_cast<ConstantSDNode>(C);
+ if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ // Match the zext operand as a setcc of a boolean.
+ if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
+ Z.getOperand(0).getValueType() != MVT::i1)
+ return SDValue();
+
+ // Match the compare as: setcc (X & 1), 0, eq.
+ SDValue SetCC = Z.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
+ SetCC.getOperand(0).getOpcode() != ISD::AND ||
+ !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ // We are adding/subtracting a constant and an inverted low bit. Turn that
+ // into a subtract/add of the low bit with incremented/decremented constant:
+ // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
+ // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
+ EVT VT = C.getValueType();
+ SDLoc DL(N);
+ SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
+ SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
+ DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
+}
+
+/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
+/// a shift and add with a different constant.
+static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Expecting add or sub");
+
+ // We need a constant operand for the add/sub, and the other operand is a
+ // logical shift right: add (srl), C or sub C, (srl).
+ bool IsAdd = N->getOpcode() == ISD::ADD;
+ SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
+ SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
+ ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
+ if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // The shift must be of a 'not' value.
+ // TODO: Use isBitwiseNot() if it works with vectors.
+ SDValue Not = ShiftOp.getOperand(0);
+ if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
+ !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
+ return SDValue();
+
+ // The shift must be moving the sign bit to the least-significant-bit.
+ EVT VT = ShiftOp.getValueType();
+ SDValue ShAmt = ShiftOp.getOperand(1);
+ ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
+ if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ return SDValue();
+
+ // Eliminate the 'not' by adjusting the shift and add/sub constant:
+ // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
+ // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
+ SDLoc DL(N);
+ auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
+ SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
+ APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2067,6 +2167,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -2075,6 +2181,11 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
if (SDValue Combined = visitADDLike(N0, N1, N))
return Combined;
@@ -2210,6 +2321,38 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
+static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ SDValue Cst;
+ switch (TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ Cst = DAG.getConstant(1, DL, VT);
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ Cst = DAG.getConstant(-1, DL, VT);
+ break;
+ }
+
+ return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
+}
+
+static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
+ if (V.getOpcode() != ISD::XOR) return false;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!Const) return false;
+
+ switch(TLI.getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Const->isOne();
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Const->isAllOnesValue();
+ case TargetLowering::UndefinedBooleanContent:
+ return (Const->getAPIntValue() & 0x01) == 1;
+ }
+ llvm_unreachable("Unsupported boolean content");
+}
+
SDValue DAGCombiner::visitUADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2240,6 +2383,15 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getConstant(0, DL, CarryVT));
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
+ }
+
if (SDValue Combined = visitUADDOLike(N0, N1, N))
return Combined;
@@ -2303,13 +2455,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (addcarry x, y, false) -> (uaddo x, y)
- if (isNullConstant(CarryIn))
- return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ EVT CarryVT = CarryIn.getValueType();
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
- EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2317,6 +2473,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
+ // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
+ if (isBitwiseNot(N0) && isNullConstant(N1) &&
+ isBooleanFlip(CarryIn, CarryVT, TLI)) {
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
+ DAG.getConstant(0, DL, N0.getValueType()),
+ N0.getOperand(0), CarryIn.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
+ }
+
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -2458,6 +2624,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (isAllOnesConstantOrAllOnesSplatConstant(N0))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+ // fold (A - (0-B)) -> A+B
+ if (N1.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
+
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
@@ -2500,12 +2671,50 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
N0.getOperand(1).getOperand(0));
+ // fold (X - (-Y * Z)) -> (X + (Y * Z))
+ if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
+ if (N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ if (N1.getOperand(1).getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
+ N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
+ }
+ }
+
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
+ if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+ return V;
+
+ if (SDValue V = foldAddSubOfSignBit(N, DAG))
+ return V;
+
+ // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
+ SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
+ SDValue S0 = N1.getOperand(0);
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
+ }
+
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
@@ -2612,8 +2821,11 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
SDValue CarryIn = N->getOperand(2);
// fold (subcarry x, y, false) -> (usubo x, y)
- if (isNullConstant(CarryIn))
- return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
return SDValue();
}
@@ -2689,11 +2901,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
SDLoc DL(N);
SDValue LogBase2 = BuildLogBase2(N1, DL);
- AddToWorklist(LogBase2.getNode());
-
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- AddToWorklist(Trunc.getNode());
return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
@@ -2816,9 +3025,10 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op1 = Node->getOperand(1);
SDValue combined;
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE;) {
- SDNode *User = *UI++;
- if (User == Node || User->use_empty())
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
+ User->use_empty())
continue;
// Convert the other matching node(s), too;
// otherwise, the DIVREM may get target-legalized into something
@@ -2868,6 +3078,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
@@ -2887,6 +3098,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+ // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
+ if (N1C && N1C->getAPIntValue().isMinSignedValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -2899,45 +3115,90 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
+ if (SDValue V = visitSDIVLike(N0, N1, N))
+ return V;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ // Helper for determining whether a value is a power-2 constant scalar or a
+ // vector of such elements.
+ auto IsPowerOfTwo = [](ConstantSDNode *C) {
+ if (C->isNullValue() || C->isOpaque())
+ return false;
+ if (C->getAPIntValue().isPowerOf2())
+ return true;
+ if ((-C->getAPIntValue()).isPowerOf2())
+ return true;
+ return false;
+ };
+
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
- (-N1C->getAPIntValue()).isPowerOf2())) {
+ if (!N->getFlags().hasExact() &&
+ ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
- unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+ // Create constants that are functions of the shift amount value.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
+ SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
+ C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
+ SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
+ if (!isConstantOrConstantVector(Inexact))
+ return SDValue();
// Splat the sign bit into the register
- SDValue SGN =
- DAG.getNode(ISD::SRA, DL, VT, N0,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
- getShiftAmountTy(N0.getValueType())));
- AddToWorklist(SGN.getNode());
+ SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
+ AddToWorklist(Sign.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue SRL =
- DAG.getNode(ISD::SRL, DL, VT, SGN,
- DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
- getShiftAmountTy(SGN.getValueType())));
- SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
- AddToWorklist(SRL.getNode());
- AddToWorklist(ADD.getNode()); // Divide by pow2
- SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
- DAG.getConstant(lg2, DL,
- getShiftAmountTy(ADD.getValueType())));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (N1C->getAPIntValue().isNonNegative())
- return SRA;
-
- AddToWorklist(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
+ AddToWorklist(Srl.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
+ AddToWorklist(Add.getNode());
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
+ AddToWorklist(Sra.getNode());
+
+ // Special case: (sdiv X, 1) -> X
+ // Special Case: (sdiv X, -1) -> 0-X
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
+ SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
+ SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
+ Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
+
+ // If dividing by a positive value, we're done. Otherwise, the result must
+ // be negated.
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
+
+ // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
+ SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
+ SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
+ return Res;
}
// If integer divide is expensive and we satisfy the requirements, emit an
@@ -2948,13 +3209,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue Op = BuildSDIV(N))
return Op;
- // sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
- // true. Otherwise, we break the simplification logic in visitREM().
- if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
- if (SDValue DivRem = useDivRem(N))
- return DivRem;
-
return SDValue();
}
@@ -2962,6 +3216,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
// fold vector ops
if (VT.isVector())
@@ -2977,6 +3232,14 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
+ // fold (udiv X, 1) -> X
+ if (N1C && N1C->isOne())
+ return N0;
+ // fold (udiv X, -1) -> select(X == -1, 1, 0)
+ if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT));
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -2984,6 +3247,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ if (SDValue V = visitUDIVLike(N0, N1, N))
+ return V;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -3019,13 +3302,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Op = BuildUDIV(N))
return Op;
- // sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
- // true. Otherwise, we break the simplification logic in visitREM().
- if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
- if (SDValue DivRem = useDivRem(N))
- return DivRem;
-
return SDValue();
}
@@ -3035,6 +3311,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+
bool isSigned = (Opcode == ISD::SREM);
SDLoc DL(N);
@@ -3044,6 +3322,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (N0C && N1C)
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ // fold (urem X, -1) -> select(X == -1, 0, x)
+ if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
+ DAG.getConstant(0, DL, VT), N0);
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -3077,22 +3359,19 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- // To avoid mangling nodes, this simplification requires that the combine()
- // call for the speculative DIV must not cause a DIVREM conversion. We guard
- // against this by skipping the simplification if isIntDivCheap(). When
- // div is not cheap, combine will not return a DIVREM. Regardless,
- // checking cheapness here makes sense since the simplification results in
- // fatter code.
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
- unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
- SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
- (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
+ // speculative DIV must not cause a DIVREM conversion. We guard against this
+ // by skipping the simplification if isIntDivCheap(). When div is not cheap,
+ // combine will not return a DIVREM. Regardless, checking cheapness here
+ // makes sense since the simplification results in fatter code.
+ if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ SDValue OptimizedDiv =
+ isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
+ if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
+ OptimizedDiv.getOpcode() != ISD::SDIVREM) {
SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(OptimizedDiv.getNode());
AddToWorklist(Mul.getNode());
return Sub;
}
@@ -3350,6 +3629,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
+ // Only do this if the current op isn't legal and the flipped is.
+ unsigned Opcode = N->getOpcode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegal(Opcode, VT) &&
+ (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
+ (N1.isUndef() || DAG.SignBitIsZero(N1))) {
+ unsigned AltOpcode;
+ switch (Opcode) {
+ case ISD::SMIN: AltOpcode = ISD::UMIN; break;
+ case ISD::SMAX: AltOpcode = ISD::UMAX; break;
+ case ISD::UMIN: AltOpcode = ISD::SMIN; break;
+ case ISD::UMAX: AltOpcode = ISD::SMAX; break;
+ default: llvm_unreachable("Unknown MINMAX opcode");
+ }
+ if (TLI.isOperationLegal(AltOpcode, VT))
+ return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ }
+
return SDValue();
}
@@ -3469,9 +3767,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ShOp = SDValue();
}
- // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
- // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
- // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+ // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
+ // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
+ // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(0), N1->getOperand(0));
@@ -3490,9 +3788,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
ShOp = SDValue();
}
- // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
- // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
- // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+ // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
+ // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
+ // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
N0->getOperand(1), N1->getOperand(1));
@@ -3525,7 +3823,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// operations on the left and right operands, so those types must match.
EVT VT = N0.getValueType();
EVT OpVT = LL.getValueType();
- if (LegalOperations || VT != MVT::i1)
+ if (LegalOperations || VT.getScalarType() != MVT::i1)
if (VT != getSetCCResultType(OpVT))
return SDValue();
if (OpVT != RL.getValueType())
@@ -3762,53 +4060,78 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return true;
}
-bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
- EVT &ExtVT, unsigned ShAmt) {
- // Don't transform one with multiple uses, this would require adding a new
- // load.
- if (!SDValue(LoadN, 0).hasOneUse())
+bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
+ ISD::LoadExtType ExtType, EVT &MemVT,
+ unsigned ShAmt) {
+ if (!LDST)
return false;
-
- if (LegalOperations &&
- !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
+ // Only allow byte offsets.
+ if (ShAmt % 8)
return false;
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
- if (!ExtVT.isRound())
+ if (!MemVT.isRound())
return false;
// Don't change the width of a volatile load.
- if (LoadN->isVolatile())
+ if (LDST->isVolatile())
return false;
// Verify that we are actually reducing a load width here.
- if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
- return false;
-
- // For the transform to be legal, the load must produce only two values
- // (the value loaded and the chain). Don't transform a pre-increment
- // load, for example, which produces an extra value. Otherwise the
- // transformation is not equivalent, and the downstream logic to replace
- // uses gets things wrong.
- if (LoadN->getNumValues() > 2)
+ if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
return false;
- // If the load that we're shrinking is an extload and we're not just
- // discarding the extension we can't simply shrink the load. Bail.
- // TODO: It would be possible to merge the extensions in some cases.
- if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
- LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
- return false;
-
- if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
+ // Ensure that this isn't going to produce an unsupported unaligned access.
+ if (ShAmt &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), ShAmt / 8))
return false;
// It's not possible to generate a constant of extended or untyped type.
- EVT PtrType = LoadN->getOperand(1).getValueType();
+ EVT PtrType = LDST->getBasePtr().getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended())
return false;
+ if (isa<LoadSDNode>(LDST)) {
+ LoadSDNode *Load = cast<LoadSDNode>(LDST);
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(Load, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (Load->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
+ Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
+ return false;
+ } else {
+ assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
+ StoreSDNode *Store = cast<StoreSDNode>(LDST);
+ // Can't write outside the original store
+ if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
+ return false;
+ }
return true;
}
@@ -3841,16 +4164,22 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
auto *Load = cast<LoadSDNode>(Op);
EVT ExtVT;
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
- isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
- // Only add this load if we can make it more narrow.
- if (ExtVT.bitsLT(Load->getMemoryVT()))
+ isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
+
+ // ZEXTLOAD is already small enough.
+ if (Load->getExtensionType() == ISD::ZEXTLOAD &&
+ ExtVT.bitsGE(Load->getMemoryVT()))
+ continue;
+
+ // Use LE to convert equal sized loads to zext.
+ if (ExtVT.bitsLE(Load->getMemoryVT()))
Loads.insert(Load);
+
continue;
}
return false;
}
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
case ISD::AssertZext: {
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -3876,7 +4205,23 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
// Allow one node which will masked along with any loads found.
if (NodeToMask)
return false;
+
+ // Also ensure that the node to be masked only produces one data result.
NodeToMask = Op.getNode();
+ if (NodeToMask->getNumValues() > 1) {
+ bool HasValue = false;
+ for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
+ MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
+ if (VT != MVT::Glue && VT != MVT::Other) {
+ if (HasValue) {
+ NodeToMask = nullptr;
+ return false;
+ }
+ HasValue = true;
+ }
+ }
+ assert(HasValue && "Node to be masked has no data result?");
+ }
}
return true;
}
@@ -3900,33 +4245,44 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (Loads.size() == 0)
return false;
+ LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
SDValue MaskOp = N->getOperand(1);
// If it exists, fixup the single node we allow in the tree that needs
// masking.
if (FixupNode) {
+ LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
FixupNode->getValueType(0),
SDValue(FixupNode, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
- DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
- MaskOp);
+ if (And.getOpcode() == ISD ::AND)
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
}
// Narrow any constants that need it.
for (auto *LogicN : NodesWithConsts) {
- auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
- SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
- SDValue(C, 0), MaskOp);
- DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+ SDValue Op0 = LogicN->getOperand(0);
+ SDValue Op1 = LogicN->getOperand(1);
+
+ if (isa<ConstantSDNode>(Op0))
+ std::swap(Op0, Op1);
+
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
+ Op1, MaskOp);
+
+ DAG.UpdateNodeOperands(LogicN, Op0, And);
}
// Create narrow loads.
for (auto *Load : Loads) {
+ LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
- DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
+ if (And.getOpcode() == ISD ::AND)
+ And = SDValue(
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
SDValue NewLoad = ReduceLoadWidth(And.getNode());
assert(NewLoad &&
"Shouldn't be masking the load if it can't be narrowed");
@@ -3938,6 +4294,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
return false;
}
+// Unfold
+// x & (-1 'logical shift' y)
+// To
+// (x 'opposite logical shift' y) 'logical shift' y
+// if it is better for performance.
+SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
+ assert(N->getOpcode() == ISD::AND);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Do we actually prefer shifts over mask?
+ if (!TLI.preferShiftsToClearExtremeBits(N0))
+ return SDValue();
+
+ // Try to match (-1 '[outer] logical shift' y)
+ unsigned OuterShift;
+ unsigned InnerShift; // The opposite direction to the OuterShift.
+ SDValue Y; // Shift amount.
+ auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
+ if (!M.hasOneUse())
+ return false;
+ OuterShift = M->getOpcode();
+ if (OuterShift == ISD::SHL)
+ InnerShift = ISD::SRL;
+ else if (OuterShift == ISD::SRL)
+ InnerShift = ISD::SHL;
+ else
+ return false;
+ if (!isAllOnesConstant(M->getOperand(0)))
+ return false;
+ Y = M->getOperand(1);
+ return true;
+ };
+
+ SDValue X;
+ if (matchMask(N1))
+ X = N0;
+ else if (matchMask(N0))
+ X = N1;
+ else
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // tmp = x 'opposite logical shift' y
+ SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
+ // ret = tmp 'logical shift' y
+ SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
+
+ return T1;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4004,7 +4414,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
};
if (N0.getOpcode() == ISD::OR &&
- matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -4235,6 +4645,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return BSwap;
}
+ if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
+ return Shifts;
+
return SDValue();
}
@@ -4261,7 +4674,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N0.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01C || N01C->getZExtValue() != 0xFF00)
+ // Also handle 0xffff since the LHS is guaranteed to have zeros there.
+ // This is needed for X86.
+ if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
+ N01C->getZExtValue() != 0xFFFF))
return SDValue();
N0 = N0.getOperand(0);
LookPassAnd0 = true;
@@ -4308,7 +4724,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!N10.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
- if (!N101C || N101C->getZExtValue() != 0xFF00)
+ // Also allow 0xFFFF since the bits will be shifted out. This is needed
+ // for X86.
+ if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
+ N101C->getZExtValue() != 0xFFFF))
return SDValue();
N10 = N10.getOperand(0);
LookPassAnd1 = true;
@@ -4379,6 +4798,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return false;
case 0xFF: MaskByteOffset = 0; break;
case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFFFF:
+ // In case demanded bits didn't clear the bits that will be shifted out.
+ // This is needed for X86.
+ if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
+ MaskByteOffset = 1;
+ break;
+ }
+ return false;
case 0xFF0000: MaskByteOffset = 2; break;
case 0xFF000000: MaskByteOffset = 3; break;
}
@@ -4693,7 +5120,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
+ ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
if (SDValue COR = DAG.FoldConstantArithmetic(
ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
@@ -4749,7 +5176,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
+ SelectionDAG &DAG) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -4784,9 +5212,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
- if (NegC->getAPIntValue() == EltSize - 1) {
+ KnownBits Known;
+ DAG.computeKnownBits(Neg.getOperand(0), Known);
+ unsigned Bits = Log2_64(EltSize);
+ if (NegC->getAPIntValue().getActiveBits() <= Bits &&
+ ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(EltSize);
+ MaskLoBits = Bits;
}
}
}
@@ -4801,10 +5233,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
// On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits && Pos.getOpcode() == ISD::AND)
- if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
- if (PosC->getAPIntValue() == EltSize - 1)
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
+ KnownBits Known;
+ DAG.computeKnownBits(Pos.getOperand(0), Known);
+ if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
+ ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
+ MaskLoBits))
Pos = Pos.getOperand(0);
+ }
+ }
// The condition we need is now:
//
@@ -4860,7 +5298,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
@@ -4878,8 +5316,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (!TLI.isTypeLegal(VT)) return nullptr;
// The target must have at least one rotate flavor.
- bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
- bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ bool HasROTL = hasOperation(ISD::ROTL, VT);
+ bool HasROTR = hasOperation(ISD::ROTR, VT);
if (!HasROTL && !HasROTR) return nullptr;
// Check for truncated rotate.
@@ -4928,7 +5366,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
- if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -5185,7 +5623,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Optional<BaseIndexOffset> Base;
SDValue Chain;
- SmallSet<LoadSDNode *, 8> Loads;
+ SmallPtrSet<LoadSDNode *, 8> Loads;
Optional<ByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
@@ -5210,7 +5648,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@@ -5284,6 +5722,88 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
}
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+// | A | |B|
+// ((x ^ y) & m) ^ y
+// | D |
+// Into:
+// (x & m) | (y & ~m)
+// If y is a constant, and the 'andn' does not work with immediates,
+// we unfold into a different pattern:
+// ~(~x & m) & (m | y)
+// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
+// the very least that breaks andnpd / andnps patterns, and because those
+// patterns are simplified in IR and shouldn't be created in the DAG
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+ assert(N->getOpcode() == ISD::XOR);
+
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // There are 3 commutable operators in the pattern,
+ // so we have to deal with 8 possible variants of the basic pattern.
+ SDValue X, Y, M;
+ auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+ if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+ return false;
+ SDValue Xor = And.getOperand(XorIdx);
+ if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
+ return false;
+ SDValue Xor0 = Xor.getOperand(0);
+ SDValue Xor1 = Xor.getOperand(1);
+ // Don't touch 'not' (i.e. where y = -1).
+ if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
+ return false;
+ if (Other == Xor0)
+ std::swap(Xor0, Xor1);
+ if (Other != Xor1)
+ return false;
+ X = Xor0;
+ Y = Xor1;
+ M = And.getOperand(XorIdx ? 0 : 1);
+ return true;
+ };
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
+ !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
+ return SDValue();
+
+ // Don't do anything if the mask is constant. This should not be reachable.
+ // InstCombine should have already unfolded this pattern, and DAGCombiner
+ // probably shouldn't produce it, too.
+ if (isa<ConstantSDNode>(M.getNode()))
+ return SDValue();
+
+ // We can transform if the target has AndNot
+ if (!TLI.hasAndNot(M))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // If Y is a constant, check that 'andn' works with immediates.
+ if (!TLI.hasAndNot(Y)) {
+ assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
+ SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
+ SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
+ return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
+ }
+
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+ SDValue NotM = DAG.getNOT(DL, M, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5363,7 +5883,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
- if (isOneConstant(N1) && VT == MVT::i1 &&
+ if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -5375,7 +5895,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
- if (isAllOnesConstant(N1) &&
+ if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -5396,13 +5916,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
- N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
- TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
- if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
+ SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
+ if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
+ SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
+ SDValue S0 = S.getOperand(0);
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
+ }
+ }
}
// fold (xor x, x) -> 0
@@ -5439,6 +5965,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
return Tmp;
+ // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
+ if (SDValue MM = unfoldMaskedMerge(N))
+ return MM;
+
// Simplify the expression using non-local knowledge.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -5641,7 +6171,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5676,7 +6206,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -5686,7 +6216,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
@@ -5862,7 +6392,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5897,7 +6427,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
@@ -5908,7 +6438,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
}
@@ -6026,7 +6556,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
return Val->getAPIntValue().uge(OpSizeInBits);
};
- if (matchUnaryPredicate(N1, MatchShiftTooBig))
+ if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -6049,7 +6579,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).uge(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
return DAG.getConstant(0, SDLoc(N), VT);
auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
@@ -6059,7 +6589,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
return (c1 + c2).ult(OpSizeInBits);
};
- if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
@@ -6270,6 +6800,13 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
// fold (ctlz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
return SDValue();
}
@@ -6290,6 +6827,13 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
// fold (cttz c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+
+ // If the value is known never to be zero, switch to the undef version.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ if (DAG.isKnownNeverZero(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ }
+
return SDValue();
}
@@ -6313,7 +6857,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
-/// \brief Generate Min/Max node
+/// Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
ISD::CondCode CC, const TargetLowering &TLI,
@@ -6428,9 +6972,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// in another basic block or it could require searching a complicated
// expression.
if (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
C1->isNullValue() && C2->isOne()) {
SDValue NotCond =
@@ -6559,15 +7103,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
- // select (xor Cond, 1), X, Y -> select Cond, Y, X
if (VT0 == MVT::i1) {
- if (N0->getOpcode() == ISD::XOR) {
- if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
- SDValue Cond0 = N0->getOperand(0);
- if (C->isOne())
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
- }
- }
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
}
// fold selects based on a setcc into other things, such as min/max/abs
@@ -6711,6 +7250,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
SDValue DataLo, DataHi;
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ SDValue Scale = MSC->getScale();
SDValue BasePtr = MSC->getBasePtr();
SDValue IndexLo, IndexHi;
std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
@@ -6720,11 +7260,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MSC->getAAInfo(), MSC->getRanges());
- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+ SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
- SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+ SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
@@ -6785,12 +7325,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MST->isCompressingStore());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MST->getAAInfo(),
- MST->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MST->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
MST->isTruncatingStore(),
@@ -6844,6 +7384,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ SDValue Scale = MGT->getScale();
SDValue BasePtr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
SDValue IndexLo, IndexHi;
@@ -6854,13 +7395,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+ SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
- MMO);
+ MMO);
- SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+ SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
- MMO);
+ MMO);
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -6934,11 +7475,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
@@ -7056,6 +7598,36 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
}
+
+ // If this select has a condition (setcc) with narrower operands than the
+ // select, try to widen the compare to match the select width.
+ // TODO: This should be extended to handle any constant.
+ // TODO: This could be extended to handle non-loading patterns, but that
+ // requires thorough testing to avoid regressions.
+ if (isNullConstantOrNullSplatConstant(RHS)) {
+ EVT NarrowVT = LHS.getValueType();
+ EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
+ EVT SetCCVT = getSetCCResultType(LHS.getValueType());
+ unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
+ unsigned WideWidth = WideVT.getScalarSizeInBits();
+ bool IsSigned = isSignedIntSetCC(CC);
+ auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
+ SetCCWidth != 1 && SetCCWidth < WideWidth &&
+ TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
+ // Both compare operands can be widened for free. The LHS can use an
+ // extended load, and the RHS is a constant:
+ // vselect (ext (setcc load(X), C)), N1, N2 -->
+ // vselect (setcc extload(X), C'), N1, N2
+ auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
+ SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
+ EVT WideSetCCVT = getSetCCResultType(WideVT);
+ SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
+ return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
+ }
+ }
}
if (SimplifySelectOps(N, N1, N2))
@@ -7127,22 +7699,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
- return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
- cast<CondCodeSDNode>(N->getOperand(2))->get(),
- SDLoc(N));
-}
+ // setcc is very commonly used as an argument to brcond. This pattern
+ // also lend itself to numerous combines and, as a result, it is desired
+ // we keep the argument to a brcond as a setcc as much as possible.
+ bool PreferSetCC =
+ N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
-SDValue DAGCombiner::visitSETCCE(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue Carry = N->getOperand(2);
- SDValue Cond = N->getOperand(3);
+ SDValue Combined = SimplifySetCC(
+ N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
- // If Carry is false, fold to a regular SETCC.
- if (Carry.getOpcode() == ISD::CARRY_FALSE)
- return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+ if (!Combined)
+ return SDValue();
- return SDValue();
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
+
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
+
+ if (NewSetCC)
+ return NewSetCC;
+ }
+
+ return Combined;
}
SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
@@ -7222,12 +7805,12 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
-static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
unsigned ExtOpc,
SmallVectorImpl<SDNode *> &ExtendNodes,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
- bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
UE = N0.getNode()->use_end();
UI != UE; ++UI) {
@@ -7283,16 +7866,16 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
- SDValue Trunc, SDValue ExtLoad,
- const SDLoc &DL, ISD::NodeType ExtType) {
+ SDValue OrigLoad, SDValue ExtLoad,
+ ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
- for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
- SDNode *SetCC = SetCCs[i];
+ SDLoc DL(ExtLoad);
+ for (SDNode *SetCC : SetCCs) {
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
- if (SOp == Trunc)
+ if (SOp == OrigLoad)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
@@ -7341,7 +7924,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
- if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
return SDValue();
ISD::LoadExtType ExtType =
@@ -7372,7 +7955,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
- ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
@@ -7395,12 +7978,82 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
// with a truncate of the concatenated sextloaded vectors.
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
CombineTo(N0.getNode(), Trunc, NewChain);
- ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
- (ISD::NodeType)N->getOpcode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
+ assert(N->getOpcode() == ISD::ZERO_EXTEND);
+ EVT VT = N->getValueType(0);
+
+ // and/or/xor
+ SDValue N0 = N->getOperand(0);
+ if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) ||
+ N0.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
+ return SDValue();
+
+ // shl/shr
+ SDValue N1 = N0->getOperand(0);
+ if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
+ N1.getOperand(1).getOpcode() != ISD::Constant ||
+ (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
+ return SDValue();
+
+ // load
+ if (!isa<LoadSDNode>(N1.getOperand(0)))
+ return SDValue();
+ LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
+ EVT MemVT = Load->getMemoryVT();
+ if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
+ Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
+ return SDValue();
+
+
+ // If the shift op is SHL, the logic op must be AND, otherwise the result
+ // will be wrong.
+ if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI))
+ return SDValue();
+
+ // Actually do the transformation.
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getMemoryVT(), Load->getMemOperand());
+
+ SDLoc DL1(N1);
+ SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
+ N1.getOperand(1));
+
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL0(N0);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
+ DAG.getConstant(Mask, DL0, VT));
+
+ ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
+ CombineTo(N, And);
+ if (SDValue(Load, 0).hasOneUse()) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
+ Load->getValueType(0), ExtLoad);
+ CombineTo(Load, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
+}
+
/// If we're narrowing or widening the result of a vector select and the final
/// size is the same size as a setcc (compare) feeding the select, then try to
/// apply the cast operation to the select's operands because matching vector
@@ -7446,6 +8099,106 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
}
+// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N,
+ SDValue N0, ISD::LoadExtType ExtLoadType) {
+ SDNode *N0Node = N0.getNode();
+ bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
+ : ISD::isZEXTLoad(N0Node);
+ if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
+ !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((LegalOperations || LN0->isVolatile()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
+ return {};
+
+ SDValue ExtLoad =
+ DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
+ Combiner.CombineTo(N, ExtLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
+// Only generate vector extloads when 1) they're legal, and 2) they are
+// deemed desirable by the target.
+static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
+ const TargetLowering &TLI, EVT VT,
+ bool LegalOperations, SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!ISD::isNON_EXTLoad(N0.getNode()) ||
+ !ISD::isUNINDEXEDLoad(N0.getNode()) ||
+ ((LegalOperations || VT.isVector() ||
+ cast<LoadSDNode>(N0)->isVolatile()) &&
+ !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
+ return {};
+
+ bool DoXform = true;
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (!DoXform)
+ return {};
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ Combiner.CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+ Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
+ bool LegalOperations) {
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
+
+ SDValue SetCC = N->getOperand(0);
+ if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
+ !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDValue X = SetCC.getOperand(0);
+ SDValue Ones = SetCC.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+ EVT XVT = X.getValueType();
+ // setge X, C is canonicalized to setgt, so we do not need to match that
+ // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
+ // not require the 'not' op.
+ if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
+ // Invert and smear/shift the sign bit:
+ // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
+ // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
+ SDLoc DL(N);
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
+ auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7510,62 +8263,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
- // fold (sext (load x)) -> (sext (truncate (sextload x)))
- // Only generate vector extloads when 1) they're legal, and 2) they are
- // deemed desirable by the target.
- if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !VT.isVector() &&
- !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
- bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
- if (VT.isVector())
- DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
- if (DoXform) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
- // If the load value is used only by N, replace it via CombineTo N.
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
- CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
- return SDValue(N, 0);
- }
- }
+ // Try to simplify (sext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::SEXTLOAD, ISD::SIGN_EXTEND))
+ return foldedExt;
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
return ExtLoad;
- // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
- // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
- if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
- ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
- LN0->getBasePtr(), MemVT,
- LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // Try to simplify (sext (sextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
+ return foldedExt;
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
@@ -7573,30 +8285,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
- bool DoXform = true;
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
- SetCCs, TLI);
+ bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemoryVT(),
- LN0->getMemOperand());
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
- SDLoc(N0.getOperand(0)),
- N0.getOperand(0).getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
@@ -7604,15 +8312,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
+
if (N0.getOpcode() == ISD::SETCC) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
@@ -7659,8 +8373,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// If the type of the setcc is larger (say, i8) then the value of the high
// bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue ExtTrueVal = (SetCCWidth == 1)
+ ? DAG.getAllOnesConstant(DL, VT)
+ : DAG.getBoolConstant(true, DL, VT, N00VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
if (SDValue SCC =
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
@@ -7777,13 +8492,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// Try to mask before the extension to avoid having to generate a larger mask,
// possibly over several sub-vectors.
- if (SrcVT.bitsLT(VT)) {
+ if (SrcVT.bitsLT(VT) && VT.isVector()) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
AddToWorklist(Op.getNode());
- return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ // Transfer the debug info; the new node is equivalent to N0.
+ DAG.transferDbgValues(N0, ZExtOrTrunc);
+ return ZExtOrTrunc;
}
}
@@ -7815,39 +8533,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X, DAG.getConstant(Mask, DL, VT));
}
- // fold (zext (load x)) -> (zext (truncate (zextload x)))
- // Only generate vector extloads when 1) they're legal, and 2) they are
- // deemed desirable by the target.
- if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- ((!LegalOperations && !VT.isVector() &&
- !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
- bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
- if (VT.isVector())
- DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
- if (DoXform) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
-
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
- // If the load value is used only by N, replace it via CombineTo N.
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
- CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
@@ -7862,10 +8552,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
+ LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
+ EVT MemVT = LN00->getMemoryVT();
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
+ LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse()) {
@@ -7873,29 +8564,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
EVT LoadResultTy = AndC->getValueType(0);
EVT ExtVT;
- if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
+ if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
DoXform = false;
}
- if (DoXform)
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
- ISD::ZERO_EXTEND, SetCCs, TLI);
}
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemoryVT(),
- LN0->getMemOperand());
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
+ LN00->getChain(), LN00->getBasePtr(),
+ LN00->getMemoryVT(),
+ LN00->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
- SDLoc(N0.getOperand(0)),
- N0.getOperand(0).getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+ ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
bool NoReplaceTruncAnd = !N0.hasOneUse();
- bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
CombineTo(N, And);
// If N0 has multiple uses, change other uses as well.
if (NoReplaceTruncAnd) {
@@ -7903,35 +8591,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
CombineTo(N0.getNode(), TruncAnd);
}
- if (NoReplaceTrunc)
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
- CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ if (NoReplaceTrunc) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
+ LN00->getValueType(0), ExtLoad);
+ CombineTo(LN00, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
- // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
- // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
- if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
- ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- if ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), MemVT,
- LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
- ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
+ // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
+ if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
+ return ZExtLoad;
+
+ // Try to simplify (zext (zextload x)).
+ if (SDValue foldedExt = tryToFoldExtOfExtload(
+ DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
+ return foldedExt;
+
+ if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
+ return V;
if (N0.getOpcode() == ISD::SETCC) {
// Only do this before legalize for now.
@@ -8069,24 +8752,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
+ TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::ANY_EXTEND);
+ ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
CombineTo(N, ExtLoad);
- if (NoReplaceTrunc)
+ if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
- else
+ } else {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ }
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8094,9 +8778,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))
// fold (aext (sextload x)) -> (aext (truncate (sextload x)))
// fold (aext ( extload x)) -> (aext (truncate (extload x)))
- if (N0.getOpcode() == ISD::LOAD &&
- !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
@@ -8105,10 +8788,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
VT, LN0->getChain(), LN0->getBasePtr(),
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -8248,8 +8928,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- ShAmt = N01->getZExtValue();
+ SDValue SRL = N0;
+ if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
+ ShAmt = ConstShift->getZExtValue();
unsigned EVTBits = ExtVT.getSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
@@ -8262,17 +8943,36 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// At this point, we must have a load or else we can't do the transform.
if (!isa<LoadSDNode>(N0)) return SDValue();
+ auto *LN0 = cast<LoadSDNode>(N0);
+
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
// lowering of SRL and an sextload.
- if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ if (LN0->getExtensionType() == ISD::SEXTLOAD)
return SDValue();
// If the shift amount is larger than the input type then we're not
// accessing any of the loaded bytes. If the load was a zextload/extload
// then the result of the shift+trunc is zero/undef (handled elsewhere).
- if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
return SDValue();
+
+ // If the SRL is only used by a masking AND, we may be able to adjust
+ // the ExtVT to make the AND redundant.
+ SDNode *Mask = *(SRL->use_begin());
+ if (Mask->getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Mask->getOperand(1))) {
+ const APInt &ShiftMask =
+ cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ if (ShiftMask.isMask()) {
+ EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
+ ShiftMask.countTrailingOnes());
+ // If the mask is smaller, recompute the type.
+ if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
+ ExtVT = MaskedVT;
+ }
+ }
}
}
@@ -8292,7 +8992,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
+ if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
// For big endian targets, we need to adjust the offset to the pointer to
@@ -8388,7 +9088,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
- // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
@@ -8762,6 +9462,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
}
+ // fold (truncate (extract_subvector(ext x))) ->
+ // (extract_subvector x)
+ // TODO: This can be generalized to cover cases where the truncate and extract
+ // do not fully cancel each other out.
+ if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) {
+ if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
+ VT.getVectorElementType())
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
+ N00.getOperand(0), N0.getOperand(1));
+ }
+ }
+
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
@@ -8882,17 +9598,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
// If the input is a constant, let getNode fold it.
- if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- // If we can't allow illegal operations, we need to check that this is just
- // a fp -> int or int -> conversion and that the resulting operation will
- // be legal.
- if (!LegalOperations ||
- (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
- (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
- TLI.isOperationLegal(ISD::Constant, VT)))
- return DAG.getBitcast(VT, N0);
- }
+ // We always need to check that this is just a fp -> int or int -> conversion
+ // otherwise we will get back N which will confuse the caller into thinking
+ // we used CombineTo. This can block target combines from running. If we can't
+ // allowed legal operations, we need to ensure the resulting operation will be
+ // legal.
+ // TODO: Maybe we should check that the return value isn't N explicitly?
+ if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
+ return DAG.getBitcast(VT, N0);
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
@@ -9238,7 +9954,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
static bool isContractable(SDNode *N) {
SDNodeFlags F = N->getFlags();
- return F.hasAllowContract() || F.hasUnsafeAlgebra();
+ return F.hasAllowContract() || F.hasAllowReassociation();
}
/// Try to perform FMA combining on a given FADD node.
@@ -9262,8 +9978,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath || HasFMAD);
+ CanFuse || HasFMAD);
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
@@ -9293,14 +10011,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1);
+ N0.getOperand(0), N0.getOperand(1), N1, Flags);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ N1.getOperand(0), N1.getOperand(1), N0, Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9314,7 +10032,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+ N00.getOperand(1)), N1, Flags);
}
}
@@ -9328,16 +10046,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ N10.getOperand(1)), N0, Flags);
}
}
// More folding opportunities when target permits.
if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
+ if (CanFuse &&
N0.getOpcode() == PreferredFusedOpcode &&
N0.getOperand(2).getOpcode() == ISD::FMUL &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
@@ -9346,13 +10062,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- N1));
+ N1, Flags), Flags);
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
+ if (CanFuse &&
N1->getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL &&
N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
@@ -9361,19 +10075,20 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(2).getOperand(0),
N1.getOperand(2).getOperand(1),
- N0));
+ N0, Flags), Flags);
}
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -9383,7 +10098,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9394,14 +10109,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+ SDNodeFlags Flags) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ Z, Flags), Flags);
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -9411,7 +10127,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1);
+ N1, Flags);
}
}
}
@@ -9426,7 +10142,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9444,7 +10160,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0);
+ N0, Flags);
}
}
}
@@ -9473,8 +10189,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SDNodeFlags Flags = N->getFlags();
+ bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath || HasFMAD);
+ CanFuse || HasFMAD);
+
// If the subtraction is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
@@ -9499,16 +10218,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
- N1.getOperand(1), N0);
+ N1.getOperand(1), N0, Flags);
+ }
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
@@ -9517,7 +10237,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -9533,7 +10253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N00.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
}
}
@@ -9550,7 +10270,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N10.getOperand(0))),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(1)),
- N0);
+ N0, Flags);
}
}
@@ -9572,7 +10292,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9595,7 +10315,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N000.getOperand(0)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N000.getOperand(1)),
- N1));
+ N1, Flags));
}
}
}
@@ -9604,9 +10324,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -9615,14 +10333,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
- // are currently only supported on binary nodes.
- if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -9632,8 +10348,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N20),
-
- N21, N0));
+ N21, N0, Flags), Flags);
}
@@ -9653,7 +10368,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N020.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -9681,7 +10396,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N002.getOperand(1)),
DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
+ N1), Flags), Flags);
}
}
}
@@ -9704,7 +10419,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1201),
- N0));
+ N0, Flags), Flags);
}
}
@@ -9735,7 +10450,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N1021),
- N0));
+ N0, Flags), Flags);
}
}
}
@@ -9751,6 +10466,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
+ const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -9782,52 +10498,54 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
// fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y) {
+ auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1))
+ if (SDValue FMA = FuseFADD(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0))
+ if (SDValue FMA = FuseFADD(N1, N0, Flags))
return FMA;
// fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
// fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
// fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
// fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
if (XC0 && XC0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y);
+ Y, Flags);
if (XC0 && XC0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
if (XC1 && XC1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y));
+ DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
if (XC1 && XC1->isExactlyValue(-1.0))
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ Y, Flags);
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1))
+ if (SDValue FMA = FuseFSUB(N0, N1, Flags))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0))
+ if (SDValue FMA = FuseFSUB(N1, N0, Flags))
return FMA;
return SDValue();
@@ -9889,35 +10607,42 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
}
- // FIXME: Auto-upgrade the target/function-level option.
- if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
- // fold (fadd A, 0) -> A
- if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
- if (N1C->isZero())
- return N0;
+ ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
+ if (N1C && N1C->isZero()) {
+ if (N1C->isNegative() || Options.UnsafeFPMath ||
+ Flags.hasNoSignedZeros()) {
+ // fold (fadd A, 0) -> A
+ return N0;
+ }
}
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // No FP constant should be created after legalization as Instruction
- // Selection pass has a hard time dealing with FP constants.
- bool AllowNewConst = (Level < AfterLegalizeDAG);
-
- // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
- Flags),
- Flags);
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
+ // If 'unsafe math' or nnan is enabled, fold lots of things.
+ if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
- if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
// If allowed, fold (fadd x, (fneg x)) -> 0.0
- if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
return DAG.getConstantFP(0.0, DL, VT);
+ }
+
+ // If 'unsafe math' or reassoc and nsz, fold lots of things.
+ // TODO: break out portions of the transformations below for which Unsafe is
+ // considered and which do not require both nsz and reassoc
+ if ((Options.UnsafeFPMath ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ AllowNewConst) {
+ // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
+ if (N1CFP && N0.getOpcode() == ISD::FADD &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ }
// We can fold chains of FADD's of the same value into multiplications.
// This transform is not safe in general because we are reducing the number
@@ -9965,7 +10690,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+ if (N0.getOpcode() == ISD::FADD) {
bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
@@ -9975,7 +10700,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
}
- if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+ if (N1.getOpcode() == ISD::FADD) {
bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
@@ -9986,8 +10711,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
// (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
- if (AllowNewConst &&
- N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
@@ -10027,15 +10751,23 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- // fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero()) {
+ if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
+ Flags.hasNoSignedZeros()) {
+ return N0;
+ }
+ }
+
+ if (N0 == N1) {
+ // (fsub x, x) -> 0.0
+ if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+ return DAG.getConstantFP(0.0f, DL, VT);
+ }
- // FIXME: Auto-upgrade the target/function-level option.
- if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
- // (fsub 0, B) -> -B
- if (N0CFP && N0CFP->isZero()) {
+ // (fsub 0, B) -> -B
+ if (N0CFP && N0CFP->isZero()) {
+ if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -10043,16 +10775,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return DAG.getNode(ISD::FADD, DL, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
- // (fsub A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
-
- // (fsub x, x) -> 0.0
- if (N0 == N1)
- return DAG.getConstantFP(0.0f, DL, VT);
-
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
if (N1.getOpcode() == ISD::FADD) {
@@ -10109,12 +10838,15 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath) {
+ if (Options.UnsafeFPMath ||
+ (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
return N1;
+ }
- // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
+ // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
if (N0.getOpcode() == ISD::FMUL) {
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
@@ -10138,13 +10870,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
- // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
- // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
- // during an early run of DAGCombiner can prevent folding with fmuls
- // inserted during lowering.
- if (N0.getOpcode() == ISD::FADD &&
- (N0.getOperand(0) == N0.getOperand(1)) &&
- N0.hasOneUse()) {
+ // Match a special-case: we convert X * 2.0 into fadd.
+ // fmul (fadd X, X), C -> fmul X, 2.0 * C
+ if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
+ N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
@@ -10238,6 +10967,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
+ // FMA nodes have flags that propagate to the created nodes.
+ const SDNodeFlags Flags = N->getFlags();
+ bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
@@ -10245,7 +10978,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -10262,12 +10995,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
- // TODO: FMA nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with all unsafe math transforms.
- SDNodeFlags Flags;
- Flags.setUnsafeAlgebra(true);
-
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&
@@ -10313,7 +11041,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
- if (Options.UnsafeFPMath) {
+ if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
@@ -10420,7 +11148,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath) {
+ if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
// Compute the reciprocal 1.0 / c2.
@@ -10529,17 +11257,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ SDNodeFlags Flags = N->getFlags();
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !Flags.hasApproximateFuncs())
return SDValue();
SDValue N0 = N->getOperand(0);
if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
- // TODO: FSQRT nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with all unsafe math transforms.
- SDNodeFlags Flags;
- Flags.setUnsafeAlgebra(true);
+ // FSQRT nodes have flags that propagate to the created nodes.
return buildSqrtEstimate(N0, Flags);
}
@@ -10607,6 +11334,41 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
return SDValue();
}
+static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // This optimization is guarded by a function attribute because it may produce
+ // unexpected results. Ie, programs may be relying on the platform-specific
+ // undefined behavior when the float-to-int conversion overflows.
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
+ if (StrictOverflow.getValueAsString().equals("false"))
+ return SDValue();
+
+ // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
+ // replacing casts with a libcall. We also must be allowed to ignore -0.0
+ // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
+ // conversions would return +0.0.
+ // FIXME: We should be able to use node-level FMF here.
+ // TODO: If strict math, should we use FABS (+ range check for signed cast)?
+ EVT VT = N->getValueType(0);
+ if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
+ !DAG.getTarget().Options.NoSignedZerosFPMath)
+ return SDValue();
+
+ // fptosi/fptoui round towards zero, so converting from FP to integer and
+ // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
+ SDValue N0 = N->getOperand(0);
+ if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
+ N0.getOperand(0).getValueType() == VT)
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -10658,6 +11420,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
}
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
return SDValue();
}
@@ -10697,6 +11462,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
}
}
+ if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
+ return FTrunc;
+
return SDValue();
}
@@ -11103,16 +11871,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
- ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
- (N1.getOperand(0).hasOneUse() &&
- N1.getOperand(0).getOpcode() == ISD::SRL))) {
- SDNode *Trunc = nullptr;
- if (N1.getOpcode() == ISD::TRUNCATE) {
- // Look pass the truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if (N1.hasOneUse()) {
+ if (SDValue NewN1 = rebuildSetCC(N1))
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+ if (N.getOpcode() == ISD::SRL ||
+ (N.getOpcode() == ISD::TRUNCATE &&
+ (N.getOperand(0).hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::SRL))) {
+ // Look pass the truncate.
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
// Match this pattern so that we can generate simpler code:
//
@@ -11131,74 +11905,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
- SDValue Op0 = N1.getOperand(0);
- SDValue Op1 = N1.getOperand(1);
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
- if (Op0.getOpcode() == ISD::AND &&
- Op1.getOpcode() == ISD::Constant) {
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
- cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
SDLoc DL(N);
- SDValue SetCC =
- DAG.getSetCC(DL,
- getSetCCResultType(Op0.getValueType()),
- Op0, DAG.getConstant(0, DL, Op0.getValueType()),
- ISD::SETNE);
-
- SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
- MVT::Other, Chain, SetCC, N2);
- // Don't add the new BRCond into the worklist or else SimplifySelectCC
- // will convert it back to (X & C1) >> C2.
- CombineTo(N, NewBRCond, false);
- // Truncate is dead.
- if (Trunc)
- deleteAndRecombine(Trunc);
- // Replace the uses of SRL with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
}
}
}
-
- if (Trunc)
- // Restore N1 if the above transformation doesn't match.
- N1 = N->getOperand(1);
}
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
- if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
- SDNode *TheXor = N1.getNode();
+ if (N.getOpcode() == ISD::XOR) {
+ // Because we may call this on a speculatively constructed
+ // SimplifiedSetCC Node, we need to simplify this node first.
+ // Ideally this should be folded into SimplifySetCC and not
+ // here. For now, grab a handle to N so we don't lose it from
+ // replacements interal to the visit.
+ HandleSDNode XORHandle(N);
+ while (N.getOpcode() == ISD::XOR) {
+ SDValue Tmp = visitXOR(N.getNode());
+ // No simplification done.
+ if (!Tmp.getNode())
+ break;
+ // Returning N is form in-visit replacement that may invalidated
+ // N. Grab value from Handle.
+ if (Tmp.getNode() == N.getNode())
+ N = XORHandle.getValue();
+ else // Node simplified. Try simplifying again.
+ N = Tmp;
+ }
+
+ if (N.getOpcode() != ISD::XOR)
+ return N;
+
+ SDNode *TheXor = N.getNode();
+
SDValue Op0 = TheXor->getOperand(0);
SDValue Op1 = TheXor->getOperand(1);
- if (Op0.getOpcode() == Op1.getOpcode()) {
- // Avoid missing important xor optimizations.
- if (SDValue Tmp = visitXOR(TheXor)) {
- if (Tmp.getNode() != TheXor) {
- DEBUG(dbgs() << "\nReplacing.8 ";
- TheXor->dump(&DAG);
- dbgs() << "\nWith: ";
- Tmp.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- deleteAndRecombine(TheXor);
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, Tmp, N2);
- }
-
- // visitXOR has changed XOR's operands or replaced the XOR completely,
- // bail out.
- return SDValue(N, 0);
- }
- }
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
@@ -11208,19 +11963,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- EVT SetCCVT = N1.getValueType();
+ EVT SetCCVT = N.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
- SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
- SetCCVT,
- Op0, Op1,
- Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, SetCC, N2);
+ return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
}
}
@@ -11452,11 +12200,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PreIndexedNodes;
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.4 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -11621,11 +12366,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.5 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (isLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -11649,7 +12392,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+/// Return the base-pointer arithmetic from an indexed \p LD.
SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
assert(AM != ISD::UNINDEXED);
@@ -11691,11 +12434,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// v3 = add v2, c
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
- DEBUG(dbgs() << "\nReplacing.6 ";
- N->dump(&DAG);
- dbgs() << "\nWith chain: ";
- Chain.getNode()->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
AddUsersToWorklist(Chain.getNode());
@@ -11726,11 +12467,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
AddUsersToWorklist(N);
} else
Index = DAG.getUNDEF(N->getValueType(1));
- DEBUG(dbgs() << "\nReplacing.7 ";
- N->dump(&DAG);
- dbgs() << "\nWith: ";
- Undef.getNode()->dump(&DAG);
- dbgs() << " and 2 other values\n");
+ LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
@@ -11758,13 +12497,14 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
SDValue NewLoad = DAG.getExtLoad(
LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
LD->getPointerInfo(), LD->getMemoryVT(), Align,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- if (NewLoad.getNode() != N)
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ // NewLoad will always be N as we are only refining the alignment
+ assert(NewLoad.getNode() == N);
+ (void)NewLoad;
}
}
}
@@ -11811,7 +12551,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
namespace {
-/// \brief Helper structure used to slice a load in smaller loads.
+/// Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
/// Shift = srl Ty1 Origin, CstTy Amount
@@ -11824,7 +12564,7 @@ namespace {
/// SliceTy is deduced from the number of bits that are actually used to
/// build Inst.
struct LoadedSlice {
- /// \brief Helper structure used to compute the cost of a slice.
+ /// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize;
@@ -11838,7 +12578,7 @@ struct LoadedSlice {
Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
- /// \brief Get the cost of one isolated slice.
+ /// Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize = false)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
@@ -11848,7 +12588,7 @@ struct LoadedSlice {
ZExts = 1;
}
- /// \brief Account for slicing gain in the current cost.
+ /// Account for slicing gain in the current cost.
/// Slicing provide a few gains like removing a shift or a
/// truncate. This method allows to grow the cost of the original
/// load with the gain from this slice.
@@ -11921,7 +12661,7 @@ struct LoadedSlice {
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
- /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+ /// Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
APInt getUsedBits() const {
@@ -11941,14 +12681,14 @@ struct LoadedSlice {
return UsedBits;
}
- /// \brief Get the size of the slice to be loaded in bytes.
+ /// Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
unsigned SliceSize = getUsedBits().countPopulation();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
- /// \brief Get the type that will be loaded for this slice.
+ /// Get the type that will be loaded for this slice.
/// Note: This may not be the final type for the slice.
EVT getLoadedType() const {
assert(DAG && "Missing context");
@@ -11956,7 +12696,7 @@ struct LoadedSlice {
return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
}
- /// \brief Get the alignment of the load used for this slice.
+ /// Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
unsigned Offset = getOffsetFromBase();
@@ -11965,7 +12705,7 @@ struct LoadedSlice {
return Alignment;
}
- /// \brief Check if this slice can be rewritten with legal operations.
+ /// Check if this slice can be rewritten with legal operations.
bool isLegal() const {
// An invalid slice is not legal.
if (!Origin || !Inst || !DAG)
@@ -12009,7 +12749,7 @@ struct LoadedSlice {
return true;
}
- /// \brief Get the offset in bytes of this slice in the original chunk of
+ /// Get the offset in bytes of this slice in the original chunk of
/// bits.
/// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
@@ -12030,7 +12770,7 @@ struct LoadedSlice {
return Offset;
}
- /// \brief Generate the sequence of instructions to load the slice
+ /// Generate the sequence of instructions to load the slice
/// represented by this object and redirect the uses of this slice to
/// this new sequence of instructions.
/// \pre this->Inst && this->Origin are valid Instructions and this
@@ -12068,7 +12808,7 @@ struct LoadedSlice {
return LastInst;
}
- /// \brief Check if this slice can be merged with an expensive cross register
+ /// Check if this slice can be merged with an expensive cross register
/// bank copy. E.g.,
/// i = load i32
/// f = bitcast i32 i to float
@@ -12117,7 +12857,7 @@ struct LoadedSlice {
} // end anonymous namespace
-/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
@@ -12133,7 +12873,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
return NarrowedUsedBits.isAllOnesValue();
}
-/// \brief Check whether or not \p First and \p Second are next to each other
+/// Check whether or not \p First and \p Second are next to each other
/// in memory. This means that there is no hole between the bits loaded
/// by \p First and the bits loaded by \p Second.
static bool areSlicesNextToEachOther(const LoadedSlice &First,
@@ -12147,7 +12887,7 @@ static bool areSlicesNextToEachOther(const LoadedSlice &First,
return areUsedBitsDense(UsedBits);
}
-/// \brief Adjust the \p GlobalLSCost according to the target
+/// Adjust the \p GlobalLSCost according to the target
/// paring capabilities and the layout of the slices.
/// \pre \p GlobalLSCost should account for at least as many loads as
/// there is in the slices in \p LoadedSlices.
@@ -12160,8 +12900,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(),
- [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
});
@@ -12208,7 +12948,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
}
}
-/// \brief Check the profitability of all involved LoadedSlice.
+/// Check the profitability of all involved LoadedSlice.
/// Currently, it is considered profitable if there is exactly two
/// involved slices (1) which are (2) next to each other in memory, and
/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
@@ -12252,7 +12992,7 @@ static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
return OrigCost > GlobalSlicingCost;
}
-/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// If the given load, \p LI, is used only by trunc or trunc(lshr)
/// operations, split it in the various pieces being extracted.
///
/// This sort of thing is introduced by SROA.
@@ -12371,22 +13111,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
- // The store should be chained directly to the load or be an operand of a
- // tokenfactor.
- if (LD == Chain.getNode())
- ; // ok.
- else if (Chain->getOpcode() != ISD::TokenFactor)
- return Result; // Fail.
- else {
- bool isOk = false;
- for (const SDValue &ChainOp : Chain->op_values())
- if (ChainOp.getNode() == LD) {
- isOk = true;
- break;
- }
- if (!isOk) return Result;
- }
-
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -12423,6 +13147,24 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+ // For narrowing to be valid, it must be the case that the load the
+ // immediately preceeding memory operation before the store.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() == ISD::TokenFactor &&
+ SDValue(LD, 1).hasOneUse()) {
+ // LD has only 1 chain use so they are no indirect dependencies.
+ bool isOk = false;
+ for (const SDValue &ChainOp : Chain->op_values())
+ if (ChainOp.getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk)
+ return Result;
+ } else
+ return Result; // Fail.
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -12741,12 +13483,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-static SDValue peekThroughBitcast(SDValue V) {
- while (V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- return V;
-}
-
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores) {
SmallVector<SDValue, 8> Chains;
@@ -12871,6 +13607,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
+ Val = peekThroughBitcast(Val);
StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
StoreInt |= C->getAPIntValue()
@@ -12903,13 +13640,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
} else { // Must be realized as a trunc store
- EVT LegalizedStoredValueTy =
+ EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
- unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
+ unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
SDValue ExtendedStoreVal =
DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
- LegalizedStoredValueTy);
+ LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
@@ -12926,10 +13663,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
}
void DAGCombiner::getStoreMergeCandidates(
- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
EVT MemVT = St->getMemoryVT();
SDValue Val = peekThroughBitcast(St->getValue());
@@ -12950,11 +13688,17 @@ void DAGCombiner::getStoreMergeCandidates(
EVT LoadVT;
if (IsLoadSrc) {
auto *Ld = cast<LoadSDNode>(Val);
- LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
// Load and store should be the same type.
if (MemVT != LoadVT)
return;
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ return;
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
@@ -12969,9 +13713,15 @@ void DAGCombiner::getStoreMergeCandidates(
return false;
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
- auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+ auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile.
+ if (OtherLd->isVolatile() || OtherLd->isIndexed())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -12993,7 +13743,7 @@ void DAGCombiner::getStoreMergeCandidates(
Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
- Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
@@ -13013,7 +13763,7 @@ void DAGCombiner::getStoreMergeCandidates(
// FIXME: We should be able to climb and
// descend TokenFactors to find candidates as well.
- SDNode *RootNode = (St->getChain()).getNode();
+ RootNode = St->getChain().getNode();
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
@@ -13044,31 +13794,54 @@ void DAGCombiner::getStoreMergeCandidates(
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
+ SDNode *RootNode) {
// FIXME: We should be able to truncate a full search of
// predecessors by doing a BFS and keeping tabs the originating
// stores from which worklist nodes come from in a similar way to
// TokenFactor simplfication.
- SmallPtrSet<const SDNode *, 16> Visited;
+ SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 8> Worklist;
- unsigned int Max = 8192;
+
+ // RootNode is a predecessor to all candidates so we need not search
+ // past it. Add RootNode (peeking through TokenFactors). Do not count
+ // these towards size check.
+
+ Worklist.push_back(RootNode);
+ while (!Worklist.empty()) {
+ auto N = Worklist.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (SDValue Op : N->ops())
+ Worklist.push_back(Op.getNode());
+ }
+ Visited.insert(N);
+ }
+
+ // Don't count pruning nodes towards max.
+ unsigned int Max = 1024 + Visited.size();
// Search Ops of store candidates.
for (unsigned i = 0; i < NumStores; ++i) {
- SDNode *n = StoreNodes[i].MemNode;
- // Potential loops may happen only through non-chain operands
- for (unsigned j = 1; j < n->getNumOperands(); ++j)
- Worklist.push_back(n->getOperand(j).getNode());
+ SDNode *N = StoreNodes[i].MemNode;
+ // Of the 4 Store Operands:
+ // * Chain (Op 0) -> We have already considered these
+ // in candidate selection and can be
+ // safely ignored
+ // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
+ // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
+ // and so no cycles are possible.
+ // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
+ //
+ // Thus we need only check predecessors of the value operands.
+ auto *Op = N->getOperand(1).getNode();
+ if (Visited.insert(Op).second)
+ Worklist.push_back(Op);
}
// Search through DAG. We can stop early if we find a store node.
- for (unsigned i = 0; i < NumStores; ++i) {
+ for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
Max))
return false;
- // Check if we ended early, failing conservatively if so.
- if (Visited.size() >= Max)
- return false;
- }
return true;
}
@@ -13106,8 +13879,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
return false;
SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
// Find potential store merge candidates by searching through chain sub-DAG
- getStoreMergeCandidates(St, StoreNodes);
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
@@ -13115,10 +13889,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Sort the memory operands according to their distance from the
// base pointer.
- std::sort(StoreNodes.begin(), StoreNodes.end(),
- [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
+ llvm::sort(StoreNodes.begin(), StoreNodes.end(),
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Store Merge attempts to merge the lowest stores. This generally
// works out as if successful, as the remaining stores are checked
@@ -13162,178 +13936,191 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
continue;
}
- // Check that we can merge these candidates without causing a cycle
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
- NumConsecutiveStores)) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumConsecutiveStores);
- continue;
- }
-
// The node with the lowest store address.
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 1;
- unsigned LastLegalVectorType = 1;
- bool LastIntegerTrunc = false;
- bool NonZero = false;
- unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = ST->getValue();
- bool IsElementZero = false;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
- else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
- IsElementZero = C->getConstantFPValue()->isNullValue();
- if (IsElementZero) {
- if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
- FirstZeroAfterNonZero = i;
- }
- NonZero |= !IsElementZero;
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast) {
- LastIntegerTrunc = false;
- LastLegalType = i + 1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
IsFast) {
- LastIntegerTrunc = true;
+ LastIntegerTrunc = false;
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the
+ // target allows it and the function is not marked with the
+ // noimplicitfloat attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Check if we found a legal integer type that creates a meaningful merge.
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved or we've dropped a non-zero value. Drop as many
- // candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
- NumSkip++;
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
}
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
- bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- RV |= Merged;
+ RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
+ UseVector, LastIntegerTrunc);
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
continue;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 1;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StVal = peekThroughBitcast(St->getValue());
- // This restriction could be loosened.
- // Bail out if any stored values are not elements extracted from a
- // vector. It should be possible to handle mixed sources, but load
- // sources need more careful handling (see the block of code below that
- // handles consecutive loads).
- if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
- StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return RV;
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
- if (TLI.isTypeLegal(Ty) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
- IsFast)
- NumStoresToMerge = i + 1;
- }
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- // Check if we found a legal integer type that creates a meaningful merge.
- if (NumStoresToMerge < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved. Drop as many candidates as we can here.
- unsigned NumSkip = 1;
- while ((NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(
+ StoreNodes, NumStoresToMerge, RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ continue;
+ }
+
+ RV |= MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
- bool Merged = MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true, false);
- if (!Merged) {
StoreNodes.erase(StoreNodes.begin(),
StoreNodes.begin() + NumStoresToMerge);
- continue;
+ NumConsecutiveStores -= NumStoresToMerge;
}
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- RV = true;
continue;
}
@@ -13347,26 +14134,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
+
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = peekThroughBitcast(St->getValue());
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
- if (!Ld)
- break;
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
-
- // The memory operands must not be volatile.
- if (Ld->isVolatile() || Ld->isIndexed())
- break;
-
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
-
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@@ -13382,90 +14156,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
- if (LoadNodes.size() < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
- continue;
- }
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 &&
+ TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+ break;
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
- // If we have load/store pair instructions and we only have two values,
- // don't bother merging.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
- continue;
- }
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
- // Scan the memory operations on the chain and find the first
- // non-consecutive load memory address. These variables hold the index in
- // the store node array.
- unsigned LastConsecutiveLoad = 1;
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 1;
- unsigned LastLegalIntegerType = 1;
- bool isDereferenceable = true;
- bool DoIntegerTruncate = false;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ unsigned LastConsecutiveLoad = 1;
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
-
- if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
- isDereferenceable = false;
-
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
-
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
- IsFastLd) {
- LastLegalVectorType = i + 1;
- }
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
- IsFastLd) {
- LastLegalIntegerType = i + 1;
- DoIntegerTruncate = false;
- // Or check whether a truncstore and extload is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
+
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
+
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
+
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
@@ -13473,105 +14232,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
FirstLoadAlign, &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
- DoIntegerTruncate = true;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
}
}
- }
- // Only use vector types if the vector type is larger than the integer type.
- // If they are the same, use integers.
- bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType =
- std::max(LastLegalVectorType, LastLegalIntegerType);
-
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
- NumElem = std::min(LastLegalType, NumElem);
-
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have is if the alignment or either
- // the load or store has improved. Drop as many candidates as we
- // can here.
- unsigned NumSkip = 1;
- while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- continue;
- }
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- // Find a legal type for the vector store.
- unsigned Elts = NumElem * NumMemElts;
- JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
- }
-
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
-
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
-
- SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
- AddToWorklist(NewStoreChain.getNode());
-
- MachineMemOperand::Flags MMOFlags = isDereferenceable ?
- MachineMemOperand::MODereferenceable:
- MachineMemOperand::MONone;
-
- SDValue NewLoad, NewStore;
- if (UseVectorTy || !DoIntegerTruncate) {
- NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign,
- MMOFlags);
- NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
- } else { // This must be the truncstore/extload case
- EVT ExtendedTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
- NewLoad =
- DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
- FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- JointMemOpVT, FirstLoadAlign, MMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
- }
-
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
-
- // Replace the all stores with the new store. Recursively remove
- // corresponding value if its no longer used.
- for (unsigned i = 0; i < NumElem; ++i) {
- SDValue Val = StoreNodes[i].MemNode->getOperand(1);
- CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
- recursivelyDeleteUnusedNodes(Val.getNode());
- }
-
- RV = true;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem =
+ std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
+
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags MMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad =
+ DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ FirstLoadAlign, MMOFlags);
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, MMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ JointMemOpVT, FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace the all stores with the new store. Recursively remove
+ // corresponding value if its no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
}
return RV;
}
@@ -13713,13 +14507,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment()) {
+ if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
SDValue NewStore =
DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
ST->getMemoryVT(), Align,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
- if (NewStore.getNode() != N)
- return CombineTo(ST, NewStore, true);
+ // NewStore will always be N as we are only refining the alignment
+ assert(NewStore.getNode() == N);
+ (void)NewStore;
}
}
}
@@ -13783,30 +14578,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
- // Deal with elidable overlapping chained stores.
- if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
- ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
- !ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
- BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
- BaseIndexOffset ST1BasePtr =
- BaseIndexOffset::match(ST1->getBasePtr(), DAG);
- unsigned STBytes = ST->getMemoryVT().getStoreSize();
- unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
- int64_t PtrDiff;
- // If this is a store who's preceeding store to a subset of the same
- // memory and no one other node is chained to that store we can
- // effectively drop the store. Do not remove stores to undef as they may
- // be used as data sinks.
-
- if (((ST->getBasePtr() == ST1->getBasePtr()) &&
- (ST->getValue() == ST1->getValue())) ||
- (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
- (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+ !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (ST1->getValue() == Value) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+
+ // If this is a store who's preceeding store to the same location
+ // and no one other node is chained to that store we can effectively
+ // drop the store. Do not remove stores to undef as they may be used as
+ // data sinks.
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef()) {
+ // ST1 is fully overwritten and can be elided. Combine with it's chain
+ // value.
CombineTo(ST1, ST1->getChain());
- return SDValue(N, 0);
+ return SDValue();
}
}
+ }
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
// truncating store. We can do this even if this is already a truncstore.
@@ -14201,6 +14996,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(1);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ // extract_vector_elt of out-of-bounds element -> UNDEF
+ if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
+ return DAG.getUNDEF(NVT);
+
// extract_vector_elt (build_vector x, y), 1 -> y
if (ConstEltNo &&
InVec.getOpcode() == ISD::BUILD_VECTOR &&
@@ -14286,6 +15085,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
+ // simplify it based on the (valid) extraction indices.
+ if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
+ return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Use->getOperand(0) == InVec &&
+ isa<ConstantSDNode>(Use->getOperand(1));
+ })) {
+ APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
+ for (SDNode *Use : InVec->uses()) {
+ auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
+ if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
+ DemandedElts.setBit(CstElt->getZExtValue());
+ }
+ if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
+ return SDValue(N, 0);
+ }
+
bool BCNumEltsChanged = false;
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
@@ -14492,7 +15308,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
"Invalid vector size");
// Check if the new vector type is legal.
- if (!isTypeLegal(VecVT)) return SDValue();
+ if (!isTypeLegal(VecVT) ||
+ (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
+ return SDValue();
// Make the new BUILD_VECTOR.
SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
@@ -14739,12 +15558,16 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
}
// Not an undef or zero. If the input is something other than an
- // EXTRACT_VECTOR_ELT with a constant index, bail out.
+ // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
+ APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
+ return SDValue();
+
// All inputs must have the same element type as the output.
if (VT.getVectorElementType() !=
ExtractedFromVec.getValueType().getVectorElementType())
@@ -14900,6 +15723,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return Shuffles[0];
}
+// Try to turn a build vector of zero extends of extract vector elts into a
+// a vector zero extend and possibly an extract subvector.
+// TODO: Support sign extend or any extend?
+// TODO: Allow undef elements?
+// TODO: Don't require the extracts to start at element 0.
+SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
+ if (LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> int64_t {
+ if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
+ if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
+ return C->getZExtValue();
+ return -1;
+ };
+
+ // Make sure the first element matches
+ // (zext (extract_vector_elt X, C))
+ int64_t Offset = checkElem(Op0);
+ if (Offset < 0)
+ return SDValue();
+
+ unsigned NumElems = N->getNumOperands();
+ SDValue In = Op0.getOperand(0).getOperand(0);
+ EVT InSVT = In.getValueType().getScalarType();
+ EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
+
+ // Don't create an illegal input type after type legalization.
+ if (LegalTypes && !TLI.isTypeLegal(InVT))
+ return SDValue();
+
+ // Ensure all the elements come from the same vector and are adjacent.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ if ((Offset + i) != checkElem(N->getOperand(i)))
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
+ Op0.getOperand(0).getOperand(1));
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -14907,6 +15778,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // If this is a splat of a bitcast from another vector, change to a
+ // concat_vector.
+ // For example:
+ // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
+ // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
+ //
+ // If X is a build_vector itself, the concat can become a larger build_vector.
+ // TODO: Maybe this is useful for non-splat too?
+ if (!LegalOperations) {
+ if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ Splat = peekThroughBitcast(Splat);
+ EVT SrcVT = Splat.getValueType();
+ if (SrcVT.isVector()) {
+ unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getVectorElementType(), NumElts);
+ if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
+ NewVT, Ops);
+ return DAG.getBitcast(VT, Concat);
+ }
+ }
+ }
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -14936,6 +15833,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
Op0.getOperand(0), Op0.getOperand(1));
}
+ if (SDValue V = convertBuildVecZextToZext(N))
+ return V;
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -15125,6 +16025,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
+ // Bail out if the vector size is not a multiple of the scalar size.
+ if (VT.getSizeInBits() % SclTy.getSizeInBits())
+ return SDValue();
+
unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
if (VNTNumElms < 2)
return SDValue();
@@ -15403,13 +16307,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Only do this if we won't split any elements.
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
- EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), NumElems);
- if ((!LegalOperations ||
- TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT = NumElems == 1 ? EltVT :
+ EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
EltSize;
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+
+ return DAG.getBitcast(NVT, Src);
+ }
// Extract the pieces from the original build_vector.
SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
@@ -15451,122 +16364,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
return NarrowBOp;
- return SDValue();
-}
-
-static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
- SDValue V, SelectionDAG &DAG) {
- SDLoc DL(V);
- EVT VT = V.getValueType();
-
- switch (V.getOpcode()) {
- default:
- return V;
-
- case ISD::CONCAT_VECTORS: {
- EVT OpVT = V->getOperand(0).getValueType();
- int OpSize = OpVT.getVectorNumElements();
- SmallBitVector OpUsedElements(OpSize, false);
- bool FoundSimplification = false;
- SmallVector<SDValue, 4> NewOps;
- NewOps.reserve(V->getNumOperands());
- for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
- SDValue Op = V->getOperand(i);
- bool OpUsed = false;
- for (int j = 0; j < OpSize; ++j)
- if (UsedElements[i * OpSize + j]) {
- OpUsedElements[j] = true;
- OpUsed = true;
- }
- NewOps.push_back(
- OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
- : DAG.getUNDEF(OpVT));
- FoundSimplification |= Op == NewOps.back();
- OpUsedElements.reset();
- }
- if (FoundSimplification)
- V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
- return V;
- }
-
- case ISD::INSERT_SUBVECTOR: {
- SDValue BaseV = V->getOperand(0);
- SDValue SubV = V->getOperand(1);
- auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (!IdxN)
- return V;
-
- int SubSize = SubV.getValueType().getVectorNumElements();
- int Idx = IdxN->getZExtValue();
- bool SubVectorUsed = false;
- SmallBitVector SubUsedElements(SubSize, false);
- for (int i = 0; i < SubSize; ++i)
- if (UsedElements[i + Idx]) {
- SubVectorUsed = true;
- SubUsedElements[i] = true;
- UsedElements[i + Idx] = false;
- }
-
- // Now recurse on both the base and sub vectors.
- SDValue SimplifiedSubV =
- SubVectorUsed
- ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
- : DAG.getUNDEF(SubV.getValueType());
- SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
- if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
- V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
- return V;
- }
- }
-}
-
-static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
- SDValue N1, SelectionDAG &DAG) {
- EVT VT = SVN->getValueType(0);
- int NumElts = VT.getVectorNumElements();
- SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
- for (int M : SVN->getMask())
- if (M >= 0 && M < NumElts)
- N0UsedElements[M] = true;
- else if (M >= NumElts)
- N1UsedElements[M - NumElts] = true;
-
- SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
- SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
- if (S0 == N0 && S1 == N1)
- return SDValue();
-
- return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
-}
-
-static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
- SDValue N1, SelectionDAG &DAG) {
- auto isUndefElt = [](SDValue V, int Idx) {
- // TODO - handle more cases as required.
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Idx).isUndef();
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Idx != 0) || V.getOperand(0).isUndef();
- return false;
- };
-
- EVT VT = SVN->getValueType(0);
- unsigned NumElts = VT.getVectorNumElements();
-
- bool Changed = false;
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
- ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
- Changed = true;
- Idx = -1;
- }
- NewMask.push_back(Idx);
- }
- if (Changed)
- return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
return SDValue();
}
@@ -16013,10 +16812,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
- // Simplify shuffle mask if a referenced element is UNDEF.
- if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
- return V;
-
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
@@ -16077,11 +16872,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- // There are various patterns used to build up a vector from smaller vectors,
- // subvectors, or elements. Scan chains of these and replace unused insertions
- // or components with undef.
- if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
- return S;
+ // Simplify source operands based on shuffle mask.
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
@@ -16394,7 +17187,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
- VT.getVectorNumElements()) {
+ VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
+ VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
}
@@ -16405,10 +17200,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
SDValue CN0 = N0.getOperand(0);
SDValue CN1 = N1.getOperand(0);
- if (CN0.getValueType().getVectorElementType() ==
- CN1.getValueType().getVectorElementType() &&
- CN0.getValueType().getVectorNumElements() ==
- VT.getVectorNumElements()) {
+ EVT CN0VT = CN0.getValueType();
+ EVT CN1VT = CN1.getValueType();
+ if (CN0VT.isVector() && CN1VT.isVector() &&
+ CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
+ CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
@@ -16663,14 +17459,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
const ConstantFPSDNode *Zero = nullptr;
if (TheSelect->getOpcode() == ISD::SELECT_CC) {
- CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
CmpLHS = TheSelect->getOperand(0);
Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
} else {
// SELECT or VSELECT
SDValue Cmp = TheSelect->getOperand(0);
if (Cmp.getOpcode() == ISD::SETCC) {
- CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
CmpLHS = Cmp.getOperand(0);
Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
}
@@ -16888,24 +17684,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
return !SCCC->isNullValue() ? N2 : N3;
}
- // Check to see if we can simplify the select into an fabs node
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
- // Allow either -0.0 or 0.0
- if (CFP->isZero()) {
- // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
- if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
- N0 == N2 && N3.getOpcode() == ISD::FNEG &&
- N2 == N3.getOperand(0))
- return DAG.getNode(ISD::FABS, DL, VT, N0);
-
- // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
- if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
- N0 == N3 && N2.getOpcode() == ISD::FNEG &&
- N2.getOperand(0) == N3)
- return DAG.getNode(ISD::FABS, DL, VT, N3);
- }
- }
-
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -17383,19 +18161,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
if (!Reciprocal) {
- // Unfortunately, Est is now NaN if the input was exactly 0.0.
- // Select out this case and force the answer to 0.0.
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 for those cases.
EVT VT = Op.getValueType();
SDLoc DL(Op);
-
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
- ZeroCmp, FPZero, Est);
- AddToWorklist(Est.getNode());
+ ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ Attribute Denorms = F.getFnAttribute("denormal-fp-math");
+ if (Denorms.getValueAsString().equals("ieee")) {
+ // fabs(X) < SmallestNormal ? 0.0 : Est
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
+ AddToWorklist(Fabs.getNode());
+ AddToWorklist(IsDenorm.getNode());
+ AddToWorklist(Est.getNode());
+ } else {
+ // X == 0.0 ? 0.0 : Est
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
+ AddToWorklist(IsZero.getNode());
+ AddToWorklist(Est.getNode());
+ }
}
}
return Est;
@@ -17433,44 +18226,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
// Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
int64_t PtrDiff;
- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
-
- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
- // able to calculate their relative offset if at least one arises
- // from an alloca. However, these allocas cannot overlap and we
- // can infer there is no alias.
- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // If the base are the same frame index but the we couldn't find a
- // constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
- !MFI.isFixedObjectIndex(B->getIndex())))
- return false;
- }
-
- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+ if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex())))
+ return false;
+ }
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
- return false;
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+ return false;
+ }
- // If we know required SrcValue1 and SrcValue2 have relatively large alignment
- // compared to the size and offset of the access, we may be able to prove they
- // do not alias. This check is conservative for now to catch cases created by
- // splitting vector types.
+ // If we know required SrcValue1 and SrcValue2 have relatively large
+ // alignment compared to the size and offset of the access, we may be able
+ // to prove they do not alias. This check is conservative for now to catch
+ // cases created by splitting vector types.
int64_t SrcValOffset0 = Op0->getSrcValueOffset();
int64_t SrcValOffset1 = Op1->getSrcValueOffset();
unsigned OrigAlignment0 = Op0->getOriginalAlignment();
@@ -17480,8 +18275,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
- // There is no overlap between these relatively aligned accesses of similar
- // size. Return no alias.
+ // There is no overlap between these relatively aligned accesses of
+ // similar size. Return no alias.
if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
(OffAlign1 + NumBytes1) <= OffAlign0)
return false;
@@ -17644,7 +18439,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@@ -17670,7 +18465,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))
@@ -17696,7 +18491,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- } // end while
+ }// end while
}
// At this point, ChainedStores lists all of the Store nodes
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index d3c94b5f9e6b..e4a9d557d386 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -61,7 +61,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -99,6 +98,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -113,6 +113,11 @@ using namespace llvm;
#define DEBUG_TYPE "isel"
+// FIXME: Remove this after the feature has proven reliable.
+static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
+ cl::init(true), cl::Hidden,
+ cl::desc("Sink local values in FastISel"));
+
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
"target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
@@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
/// Set the current block to which generated machine instructions will be
-/// appended, and clear the local CSE map.
+/// appended.
void FastISel::startNewBlock() {
- LocalValueMap.clear();
+ assert(LocalValueMap.empty() &&
+ "local values should be cleared after finishing a BB");
// Instructions are appended to FuncInfo.MBB. If the basic block already
// contains labels or copies, use the last instruction as the last local
@@ -133,6 +139,9 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
+/// Flush the local CSE map and sink anything we can.
+void FastISel::finishBasicBlock() { flushLocalValueMap(); }
+
bool FastISel::lowerArguments() {
if (!FuncInfo.CanLowerReturn)
// Fallback to SDISel argument lowering code to deal with sret pointer
@@ -153,11 +162,168 @@ bool FastISel::lowerArguments() {
return true;
}
+/// Return the defined register if this instruction defines exactly one
+/// virtual register and uses no other virtual registers. Otherwise return 0.
+static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
+ unsigned RegDef = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ if (RegDef)
+ return 0;
+ RegDef = MO.getReg();
+ } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // This is another use of a vreg. Don't try to sink it.
+ return 0;
+ }
+ }
+ return RegDef;
+}
+
void FastISel::flushLocalValueMap() {
+ // Try to sink local values down to their first use so that we can give them a
+ // better debug location. This has the side effect of shrinking local value
+ // live ranges, which helps out fast regalloc.
+ if (SinkLocalValues && LastLocalValue != EmitStartPt) {
+ // Sink local value materialization instructions between EmitStartPt and
+ // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
+ // avoid inserting into the range that we're iterating over.
+ MachineBasicBlock::reverse_iterator RE =
+ EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
+ : FuncInfo.MBB->rend();
+ MachineBasicBlock::reverse_iterator RI(LastLocalValue);
+
+ InstOrderMap OrderMap;
+ for (; RI != RE;) {
+ MachineInstr &LocalMI = *RI;
+ ++RI;
+ bool Store = true;
+ if (!LocalMI.isSafeToMove(nullptr, Store))
+ continue;
+ unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ if (DefReg == 0)
+ continue;
+
+ sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ }
+ }
+
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
recomputeInsertPt();
SavedInsertPt = FuncInfo.InsertPt;
+ LastFlushPoint = FuncInfo.InsertPt;
+}
+
+static bool isRegUsedByPhiNodes(unsigned DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
+/// Build a map of instruction orders. Return the first terminator and its
+/// order. Consider EH_LABEL instructions to be terminators as well, since local
+/// values for phis after invokes must be materialized before the call.
+void FastISel::InstOrderMap::initialize(
+ MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
+ unsigned Order = 0;
+ for (MachineInstr &I : *MBB) {
+ if (!FirstTerminator &&
+ (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) {
+ FirstTerminator = &I;
+ FirstTerminatorOrder = Order;
+ }
+ Orders[&I] = Order++;
+
+ // We don't need to order instructions past the last flush point.
+ if (I.getIterator() == LastFlushPoint)
+ break;
+ }
+}
+
+void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
+ unsigned DefReg,
+ InstOrderMap &OrderMap) {
+ // If this register is used by a register fixup, MRI will not contain all
+ // the uses until after register fixups, so don't attempt to sink or DCE
+ // this instruction. Register fixups typically come from no-op cast
+ // instructions, which replace the cast instruction vreg with the local
+ // value vreg.
+ if (FuncInfo.RegsWithFixups.count(DefReg))
+ return;
+
+ // We can DCE this instruction if there are no uses and it wasn't a
+ // materialized for a successor PHI node.
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization "
+ << LocalMI);
+ OrderMap.Orders.erase(&LocalMI);
+ LocalMI.eraseFromParent();
+ return;
+ }
+
+ // Number the instructions if we haven't yet so we can efficiently find the
+ // earliest use.
+ if (OrderMap.Orders.empty())
+ OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
+
+ // Find the first user in the BB.
+ MachineInstr *FirstUser = nullptr;
+ unsigned FirstOrder = std::numeric_limits<unsigned>::max();
+ for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
+ auto I = OrderMap.Orders.find(&UseInst);
+ assert(I != OrderMap.Orders.end() &&
+ "local value used by instruction outside local region");
+ unsigned UseOrder = I->second;
+ if (UseOrder < FirstOrder) {
+ FirstOrder = UseOrder;
+ FirstUser = &UseInst;
+ }
+ }
+
+ // The insertion point will be the first terminator or the first user,
+ // whichever came first. If there was no terminator, this must be a
+ // fallthrough block and the insertion point is the end of the block.
+ MachineBasicBlock::instr_iterator SinkPos;
+ if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
+ FirstOrder = OrderMap.FirstTerminatorOrder;
+ SinkPos = OrderMap.FirstTerminator->getIterator();
+ } else if (FirstUser) {
+ SinkPos = FirstUser->getIterator();
+ } else {
+ assert(UsedByPHI && "must be users if not used by a phi");
+ SinkPos = FuncInfo.MBB->instr_end();
+ }
+
+ // Collect all DBG_VALUEs before the new insertion position so that we can
+ // sink them.
+ SmallVector<MachineInstr *, 1> DbgValues;
+ for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
+ if (!DbgVal.isDebugValue())
+ continue;
+ unsigned UseOrder = OrderMap.Orders[&DbgVal];
+ if (UseOrder < FirstOrder)
+ DbgValues.push_back(&DbgVal);
+ }
+
+ // Sink LocalMI before SinkPos and assign it the same DebugLoc.
+ LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
+ FuncInfo.MBB->remove(&LocalMI);
+ FuncInfo.MBB->insert(SinkPos, &LocalMI);
+ if (SinkPos != FuncInfo.MBB->end())
+ LocalMI.setDebugLoc(SinkPos->getDebugLoc());
+
+ // Sink any debug values that we've collected.
+ for (MachineInstr *DI : DbgValues) {
+ FuncInfo.MBB->remove(DI);
+ FuncInfo.MBB->insert(SinkPos, DI);
+ }
}
bool FastISel::hasTrivialKill(const Value *V) {
@@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
AssignedReg = Reg;
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
- for (unsigned i = 0; i < NumRegs; i++)
+ for (unsigned i = 0; i < NumRegs; i++) {
FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+ FuncInfo.RegsWithFixups.insert(Reg + i);
+ }
AssignedReg = Reg;
}
@@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
return true;
}
-/// \brief Lower an argument list according to the target calling convention.
+/// Lower an argument list according to the target calling convention.
///
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
@@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgIdx);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
@@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
+
// Insert the Patchable Event Call instruction, that gets lowered properly.
return true;
}
+bool FastISel::selectXRayTypedEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
+ /*IsDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+
+ // Insert the Patchable Typed Event Call instruction, that gets lowered properly.
+ return true;
+}
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
@@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
@@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (Op) {
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
- if (Op->isReg()) {
- Op->setIsDebug(true);
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- Op->getReg(), DI->getVariable(), DI->getExpression());
- } else
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE))
- .add(*Op)
- .addImm(0)
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ *Op, DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
}
@@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
}
@@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
- case Intrinsic::invariant_group_barrier:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
unsigned ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
@@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::xray_customevent:
return selectXRayCustomEvent(II);
+ case Intrinsic::xray_typedevent:
+ return selectXRayTypedEvent(II);
}
return fastLowerIntrinsicCall(II);
@@ -2051,11 +2234,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
- for (BasicBlock::const_iterator I = SuccBB->begin();
- const auto *PN = dyn_cast<PHINode>(I); ++I) {
-
+ for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
- if (PN->use_empty())
+ if (PN.use_empty())
continue;
// Only handle legal types. Two interesting things to note here. First,
@@ -2064,7 +2245,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// own moves. Second, this check is necessary because FastISel doesn't
// use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
- EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
+ EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
// Handle integer promotions, though, because they're common and easy.
if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
@@ -2073,11 +2254,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
}
}
- const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
// Set the DebugLoc for the copy. Prefer the location of the operand
// if there is one; use the location of the PHI otherwise.
- DbgLoc = PN->getDebugLoc();
+ DbgLoc = PN.getDebugLoc();
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index c7cdb49203b1..42c7181dac41 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -118,6 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
}
+ if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ calculateWasmEHInfo(&fn, EHInfo);
+ }
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
@@ -226,9 +231,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
const Instruction *PadInst = BB.getFirstNonPHI();
// If this is a non-landingpad EH pad, mark this function as using
// funclets.
- // FIXME: SEH catchpads do not create funclets, so we could avoid setting
- // this in such cases in order to improve frame layout.
+ // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid
+ // setting this in such cases in order to improve frame layout.
if (!isa<LandingPadInst>(PadInst)) {
+ MF->setHasEHScopes(true);
MF->setHasEHFunclets(true);
MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
}
@@ -257,20 +263,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Create Machine PHI nodes for LLVM PHI nodes, lowering them as
// appropriate.
- for (BasicBlock::const_iterator I = BB.begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (PN->use_empty()) continue;
+ for (const PHINode &PN : BB.phis()) {
+ if (PN.use_empty())
+ continue;
// Skip empty types
- if (PN->getType()->isEmptyTy())
+ if (PN.getType()->isEmptyTy())
continue;
- DebugLoc DL = PN->getDebugLoc();
- unsigned PHIReg = ValueMap[PN];
+ DebugLoc DL = PN.getDebugLoc();
+ unsigned PHIReg = ValueMap[&PN];
assert(PHIReg && "PHI node does not have an assigned virtual register!");
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
for (EVT VT : ValueVTs) {
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -281,28 +287,46 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
- if (!isFuncletEHPersonality(Personality))
- return;
-
- WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (isFuncletEHPersonality(Personality)) {
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
- // Map all BB references in the WinEH data to MBBs.
- for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
- for (WinEHHandlerType &H : TBME.HandlerArray) {
- if (H.Handler)
- H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ }
+ }
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const auto *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const auto *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
}
}
- for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
- if (UME.Cleanup)
- UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
- for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
- const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
- UME.Handler = MBBMap[BB];
- }
- for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
- const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
- CME.Handler = MBBMap[BB];
+
+ else if (Personality == EHPersonality::Wasm_CXX) {
+ WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo();
+ // Map all BB references in the WinEH data to MBBs.
+ DenseMap<BBOrMBB, BBOrMBB> NewMap;
+ for (auto &KV : EHInfo.EHPadUnwindMap) {
+ const auto *Src = KV.first.get<const BasicBlock *>();
+ const auto *Dst = KV.second.get<const BasicBlock *>();
+ NewMap[MBBMap[Src]] = MBBMap[Dst];
+ }
+ EHInfo.EHPadUnwindMap = std::move(NewMap);
+ NewMap.clear();
+ for (auto &KV : EHInfo.ThrowUnwindMap) {
+ const auto *Src = KV.first.get<const BasicBlock *>();
+ const auto *Dst = KV.second.get<const BasicBlock *>();
+ NewMap[MBBMap[Src]] = MBBMap[Dst];
+ }
+ EHInfo.ThrowUnwindMap = std::move(NewMap);
}
}
@@ -312,12 +336,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
void FunctionLoweringInfo::clear() {
MBBMap.clear();
ValueMap.clear();
+ VirtReg2Value.clear();
StaticAllocaMap.clear();
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
+ RegsWithFixups.clear();
StatepointStackSlots.clear();
StatepointSpillMaps.clear();
PreferredExtendType.clear();
@@ -483,7 +509,7 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
auto I = ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
- DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
return INT_MAX;
}
@@ -547,3 +573,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
}
return std::make_pair(It->second, false);
}
+
+const Value *
+FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+ if (VirtReg2Value.empty()) {
+ for (auto &P : ValueMap) {
+ VirtReg2Value[P.second] = P.first;
+ }
+ }
+ return VirtReg2Value[Vreg];
+}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cc9b41b4b487..d6171f3177d7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -394,11 +394,26 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
MIB.addFPImm(F->getConstantFPValue());
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ unsigned VReg = R->getReg();
+ MVT OpVT = Op.getSimpleValueType();
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
+ const TargetRegisterClass *IIRC =
+ II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
+ : nullptr;
+
+ if (OpRC && IIRC && OpRC != IIRC &&
+ TargetRegisterInfo::isVirtualRegister(VReg)) {
+ unsigned NewVReg = MRI->createVirtualRegister(IIRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
// Turn additional physreg operands into implicit uses on non-variadic
// instructions. This is used by call and return instructions passing
// arguments in registers.
bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
- MIB.addReg(R->getReg(), getImplRegState(Imp));
+ MIB.addReg(VReg, getImplRegState(Imp));
} else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
MIB.addRegMask(RM->getRegMask());
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
@@ -682,11 +697,15 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
- return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(SD->getFrameIx())
- .addImm(0)
- .addMetadata(Var)
- .addMetadata(Expr);
+ auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(SD->getFrameIx());
+ if (SD->isIndirect())
+ // Push [fi + 0] onto the DIExpression stack.
+ FrameMI.addImm(0);
+ else
+ // Push fi onto the DIExpression stack.
+ FrameMI.addReg(0);
+ return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
@@ -705,6 +724,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
else
AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
/*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::VREG) {
+ MIB.addReg(SD->getVReg(), RegState::Debug);
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -736,6 +757,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
return &*MIB;
}
+MachineInstr *
+InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
+ MDNode *Label = SD->getLabel();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ MIB.addMetadata(Label);
+
+ return &*MIB;
+}
+
/// EmitMachineNode - Generate machine code for a target-specific node and
/// needed dependencies.
///
@@ -807,9 +842,34 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Add result register values for things that are defined by this
// instruction.
- if (NumResults)
+ if (NumResults) {
CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+ // Transfer any IR flags from the SDNode to the MachineInstr
+ MachineInstr *MI = MIB.getInstr();
+ const SDNodeFlags Flags = Node->getFlags();
+ if (Flags.hasNoSignedZeros())
+ MI->setFlag(MachineInstr::MIFlag::FmNsz);
+
+ if (Flags.hasAllowReciprocal())
+ MI->setFlag(MachineInstr::MIFlag::FmArcp);
+
+ if (Flags.hasNoNaNs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoNans);
+
+ if (Flags.hasNoInfs())
+ MI->setFlag(MachineInstr::MIFlag::FmNoInfs);
+
+ if (Flags.hasAllowContract())
+ MI->setFlag(MachineInstr::MIFlag::FmContract);
+
+ if (Flags.hasApproximateFuncs())
+ MI->setFlag(MachineInstr::MIFlag::FmAfn);
+
+ if (Flags.hasAllowReassociation())
+ MI->setFlag(MachineInstr::MIFlag::FmReassoc);
+ }
+
// Emit all of the actual operands of this instruction, adding them to the
// instruction as appropriate.
bool HasOptPRefs = NumDefs > NumResults;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 8a8a1bbd18f7..701b6368690b 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -113,6 +113,9 @@ public:
MachineInstr *EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap);
+ /// Generate machine instruction for a dbg_label node.
+ MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
+
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb1dc17b7a1b..2b7ba1ffb309 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -41,6 +40,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -87,11 +87,11 @@ class SelectionDAGLegalize {
const TargetLowering &TLI;
SelectionDAG &DAG;
- /// \brief The set of nodes which have already been legalized. We hold a
+ /// The set of nodes which have already been legalized. We hold a
/// reference to it in order to update as necessary on node deletion.
SmallPtrSetImpl<SDNode *> &LegalizedNodes;
- /// \brief A set of all the nodes updated during legalization.
+ /// A set of all the nodes updated during legalization.
SmallSetVector<SDNode *, 16> *UpdatedNodes;
EVT getSetCCResultType(EVT VT) const {
@@ -107,7 +107,7 @@ public:
: TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
- /// \brief Legalizes the given operation.
+ /// Legalizes the given operation.
void LegalizeOp(SDNode *Node);
private:
@@ -167,7 +167,7 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
- SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
const SDLoc &dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
const SDLoc &dl);
@@ -200,8 +200,8 @@ public:
}
void ReplaceNode(SDNode *Old, SDNode *New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
- dbgs() << " with: "; New->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
assert(Old->getNumValues() == New->getNumValues() &&
"Replacing one node with another that produces a different number "
@@ -213,8 +213,8 @@ public:
}
void ReplaceNode(SDValue Old, SDValue New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
- dbgs() << " with: "; New->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
DAG.ReplaceAllUsesWith(Old, New);
if (UpdatedNodes)
@@ -223,13 +223,12 @@ public:
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
- DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
DAG.ReplaceAllUsesWith(Old, New);
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
- DEBUG(dbgs() << (i == 0 ? " with: "
- : " and: ");
- New[i]->dump(&DAG));
+ LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: ");
+ New[i]->dump(&DAG));
if (UpdatedNodes)
UpdatedNodes->insert(New[i].getNode());
}
@@ -408,7 +407,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
- DEBUG(dbgs() << "Optimizing float store operations\n");
+ LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
@@ -477,7 +476,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
AAMDNodes AAInfo = ST->getAAInfo();
if (!ST->isTruncatingStore()) {
- DEBUG(dbgs() << "Legalizing store operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing store operation\n");
if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
ReplaceNode(ST, OptStore);
return;
@@ -495,15 +494,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Align = ST->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
} else
- DEBUG(dbgs() << "Legal store\n");
+ LLVM_DEBUG(dbgs() << "Legal store\n");
break;
}
case TargetLowering::Custom: {
- DEBUG(dbgs() << "Trying custom lowering\n");
+ LLVM_DEBUG(dbgs() << "Trying custom lowering\n");
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
if (Res && Res != SDValue(Node, 0))
ReplaceNode(SDValue(Node, 0), Res);
@@ -524,7 +523,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
return;
}
- DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
unsigned StWidth = StVT.getSizeInBits();
@@ -656,7 +655,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
- DEBUG(dbgs() << "Legalizing non-extending load operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n");
MVT VT = Node->getSimpleValueType(0);
SDValue RVal = SDValue(Node, 0);
SDValue RChain = SDValue(Node, 1);
@@ -706,7 +705,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
return;
}
- DEBUG(dbgs() << "Legalizing extending load operation\n");
+ LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
unsigned Alignment = LD->getAlignment();
@@ -947,39 +946,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
-static TargetLowering::LegalizeAction
-getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
- unsigned EqOpc;
- switch (Opcode) {
- default: llvm_unreachable("Unexpected FP pseudo-opcode");
- case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
- case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
- case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
- case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
- }
-
- auto Action = TLI.getOperationAction(EqOpc, VT);
-
- // We don't currently handle Custom or Promote for strict FP pseudo-ops.
- // For now, we just expand for those cases.
- if (Action != TargetLowering::Legal)
- Action = TargetLowering::Expand;
-
- return Action;
-}
-
/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
- DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
// Allow illegal target nodes and illegal registers.
if (Node->getOpcode() == ISD::TargetConstant ||
@@ -1043,8 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::SETCC ? 2 :
- Node->getOpcode() == ISD::SETCCE ? 3 : 1;
+ Node->getOpcode() == ISD::SETCC ? 2 : 1;
unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
@@ -1122,6 +1090,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
@@ -1139,8 +1111,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
// ISD::STRICT_FSQRT.
- Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
- Node->getValueType(0));
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -1202,10 +1174,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
switch (Action) {
case TargetLowering::Legal:
- DEBUG(dbgs() << "Legal node: nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
return;
case TargetLowering::Custom:
- DEBUG(dbgs() << "Trying custom legalization\n");
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
@@ -1213,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
if (Node->getNumValues() == 1) {
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
// We can just directly replace this node with the lowered value.
ReplaceNode(SDValue(Node, 0), Res);
return;
@@ -1222,11 +1194,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
ResultVals.push_back(Res.getValue(i));
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
ReplaceNode(Node, ResultVals.data());
return;
}
- DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
case TargetLowering::Expand:
if (ExpandNode(Node))
@@ -1623,6 +1595,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
+ bool NeedSwap = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
@@ -1630,23 +1603,37 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
break;
case TargetLowering::Expand: {
ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
- if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
std::swap(LHS, RHS);
CC = DAG.getCondCode(InvCC);
return true;
}
+ // Swapping operands didn't work. Try inverting the condition.
+ InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+ if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ // If inverting the condition is not enough, try swapping operands
+ // on top of it.
+ InvCC = ISD::getSetCCSwappedOperands(InvCC);
+ NeedSwap = true;
+ }
+ if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ if (NeedSwap)
+ std::swap(LHS, RHS);
+ return true;
+ }
+
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETO:
- assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
- == TargetLowering::Legal
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
case ISD::SETUO:
- assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
- == TargetLowering::Legal
+ assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)
&& "If SETUO is expanded, SETUNE must be legal!");
CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
case ISD::SETOEQ:
@@ -1676,20 +1663,10 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
case ISD::SETGT:
case ISD::SETGE:
case ISD::SETLT:
- // We only support using the inverted operation, which is computed above
- // and not a different manner of supporting expanding these cases.
- llvm_unreachable("Don't know how to expand this condition!");
case ISD::SETNE:
case ISD::SETEQ:
- // Try inverting the result of the inverse condition.
- InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
- if (TLI.isCondCodeLegal(InvCC, OpVT)) {
- CC = DAG.getCondCode(InvCC);
- NeedInvert = true;
- return true;
- }
- // If inverting the condition didn't work then we have no means to expand
- // the condition.
+ // If all combinations of inverting the condition and swapping operands
+ // didn't work then we have no means to expand the condition.
llvm_unreachable("Don't know how to expand this condition!");
}
@@ -1996,14 +1973,15 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy(DAG.getDataLayout()));
- Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
// By default, the input chain to this libcall is the entry node of the
// function. If the libcall is going to be emitted as a tail call then
@@ -2022,24 +2000,25 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(SDLoc(Node))
.setChain(InChain)
.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
std::move(Args))
.setTailCall(isTailCall)
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend)
.setIsPostTypeLegalization(true);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode()) {
- DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
}
- DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
return CallInfo.first;
}
@@ -2325,10 +2304,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
// TODO: Should any fast-math-flags be set for the created nodes?
- DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
+ LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
- DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
- "expansion\n");
+ LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
@@ -2393,7 +2372,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// and in all alternate rounding modes.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
- DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
+ LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
SDValue TwoP52 =
DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
SDValue TwoP84PlusTwoP52 =
@@ -2416,7 +2395,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
- DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
+ LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
if (!isSigned) {
@@ -2851,7 +2830,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
- DEBUG(dbgs() << "Trying to expand node\n");
+ LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2965,12 +2944,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ZERO_EXTEND:
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
DAG.getValueType(AtomicType));
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
ExtRes = LHS;
break;
case ISD::ANY_EXTEND:
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
break;
default:
llvm_unreachable("Invalid atomic op extension");
@@ -3309,7 +3288,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
case ISD::FP_TO_FP16:
- DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
+ LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3523,15 +3502,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::USUBO: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+ // If ADD/SUBCARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Results.push_back(SDValue(NodeCarry.getNode(), 0));
+ Results.push_back(SDValue(NodeCarry.getNode(), 1));
+ break;
+ }
+
+ SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
Results.push_back(Sum);
EVT ResultType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(Node->getValueType(0));
- ISD::CondCode CC
- = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
@@ -3682,8 +3671,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
- Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
- DAG.getConstant(EntrySize, dl, Index.getValueType()));
+ // For power-of-two jumptable entry sizes convert multiplication to a shift.
+ // This transformation needs to be done here since otherwise the MIPS
+ // backend will end up emitting a three instruction multiply sequence
+ // instead of a single shift and MSP430 will call a runtime function.
+ if (llvm::isPowerOf2_32(EntrySize))
+ Index = DAG.getNode(
+ ISD::SHL, dl, Index.getValueType(), Index,
+ DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType()));
+ else
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EntrySize, dl, Index.getValueType()));
SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
Index, Table);
@@ -3699,7 +3697,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
TLI.getPICJumpTableRelocBase(Table, DAG));
}
- Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+
+ Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);
Results.push_back(Tmp1);
break;
}
@@ -3718,7 +3717,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (Tmp2.isUndef() ||
(Tmp2.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
- dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
Tmp3 = Tmp2;
else
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
@@ -3757,7 +3756,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
int TrueValue;
- switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
+ switch (TLI.getBooleanContents(Tmp1.getValueType())) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = 1;
@@ -3782,7 +3781,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue CC = Node->getOperand(4);
ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
- if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) {
// If the condition code is legal, then we need to expand this
// node using SETCC and SELECT.
EVT CmpVT = Tmp1.getValueType();
@@ -3803,7 +3802,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// version (or vice versa).
ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
Tmp1.getValueType().isInteger());
- if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
@@ -3811,7 +3810,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If The inverse is not legal, then try to swap the arguments using
// the inverse condition code.
ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
- if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+ if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) {
// The swapped inverse condition is legal, so swap true and false,
// lhs and rhs.
Legalized = true;
@@ -3904,6 +3903,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ bool IsLeft = Node->getOpcode() == ISD::ROTL;
+ SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1);
+ EVT ResVT = Node->getValueType(0);
+ EVT OpVT = Op0.getValueType();
+ assert(OpVT == ResVT &&
+ "The result and the operand types of rotate should match");
+ EVT ShVT = Op1.getValueType();
+ SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT);
+
+ // If a rotate in the other direction is legal, use it.
+ unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
+ if (TLI.isOperationLegal(RevRot, ResVT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
+ Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub));
+ break;
+ }
+
+ // Otherwise,
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ //
+ assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) &&
+ "Expecting the type bitwidth to be a power of 2");
+ unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
+ unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
+ SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT,
+ Width, DAG.getConstant(1, dl, ShVT));
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1);
+ SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1);
+ SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1);
+
+ SDValue Or = DAG.getNode(ISD::OR, dl, ResVT,
+ DAG.getNode(ShOpc, dl, ResVT, Op0, And0),
+ DAG.getNode(HsOpc, dl, ResVT, Op0, And1));
+ Results.push_back(Or);
+ break;
+ }
+
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -3919,19 +3958,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Replace the original node with the legalized result.
if (Results.empty()) {
- DEBUG(dbgs() << "Cannot expand node\n");
+ LLVM_DEBUG(dbgs() << "Cannot expand node\n");
return false;
}
- DEBUG(dbgs() << "Succesfully expanded node\n");
+ LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
- DEBUG(dbgs() << "Trying to convert node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
+ // FIXME: Check flags on the node to see if we can use a finite call.
+ bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath;
unsigned Opc = Node->getOpcode();
switch (Opc) {
case ISD::ATOMIC_FENCE: {
@@ -3960,6 +4001,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -4026,33 +4068,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FLOG:
case ISD::STRICT_FLOG:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
break;
case ISD::FTRUNC:
Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
@@ -4098,9 +4175,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FPOW:
case ISD::STRICT_FPOW:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
+ if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128));
+ else
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
break;
case ISD::FDIV:
Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
@@ -4184,10 +4268,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
// Replace the original node with the legalized result.
if (!Results.empty()) {
- DEBUG(dbgs() << "Successfully converted node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n");
ReplaceNode(Node, Results.data());
} else
- DEBUG(dbgs() << "Could not convert node to libcall\n");
+ LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n");
}
// Determine the vector type to use in place of an original scalar element when
@@ -4201,7 +4285,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
}
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
- DEBUG(dbgs() << "Trying to promote node\n");
+ LLVM_DEBUG(dbgs() << "Trying to promote node\n");
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
@@ -4254,7 +4338,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ISD::SRL, dl, NVT, Tmp1,
DAG.getConstant(DiffBits, dl,
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
- Results.push_back(Tmp1);
+
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
break;
}
case ISD::FP_TO_UINT:
@@ -4638,10 +4723,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
// Replace the original node with the legalized result.
if (!Results.empty()) {
- DEBUG(dbgs() << "Successfully promoted node\n");
+ LLVM_DEBUG(dbgs() << "Successfully promoted node\n");
ReplaceNode(Node, Results.data());
} else
- DEBUG(dbgs() << "Could not promote node\n");
+ LLVM_DEBUG(dbgs() << "Could not promote node\n");
}
/// This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e28a3aa47ca3..b0ae1e0399fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -47,8 +47,8 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -738,8 +738,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
switch (N->getOpcode()) {
@@ -1039,7 +1039,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1538,7 +1538,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
/// types of the node are known to be legal, but other operands of the node may
/// need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
@@ -1658,18 +1658,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
- // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
- if (RVT == MVT::i32) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
- "Logic only correct for ppcf128!");
- SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
- N->getOperand(0), DAG.getValueType(MVT::f64));
- Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
- DAG.getIntPtrConstant(1, dl));
- return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
- }
-
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
@@ -1679,31 +1667,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
- // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
- if (RVT == MVT::i32) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
- "Logic only correct for ppcf128!");
- const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
- SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
- // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
- // FIXME: generated code sucks.
- // TODO: Are there fast-math-flags to propagate to this FSUB?
- return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
- DAG.getNode(ISD::FSUB, dl,
- MVT::ppcf128,
- N->getOperand(0),
- Tmp)),
- DAG.getConstant(0x80000000, dl,
- MVT::i32)),
- DAG.getNode(ISD::FP_TO_SINT, dl,
- MVT::i32, N->getOperand(0)),
- ISD::SETGE);
- }
-
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
@@ -2139,13 +2102,12 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- auto MMOFlags =
- L->getMemOperand()->getFlags() &
- ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
SDLoc(N), L->getChain(), L->getBasePtr(),
L->getOffset(), L->getPointerInfo(), IVT,
- L->getAlignment(), MMOFlags, L->getAAInfo());
+ L->getAlignment(),
+ L->getMemOperand()->getFlags(),
+ L->getAAInfo());
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 29f0bb475b08..63a1ea13a5f5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -36,12 +36,13 @@ using namespace llvm;
/// may also have invalid operands or may have other results that need
/// expansion, we just know that (at least) one result needs promotion.
void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
- DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
return;
}
@@ -146,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -501,7 +503,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
- N->getIndex()};
+ N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -586,43 +588,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
- EVT SVT = getSetCCResultType(N->getOperand(0).getValueType());
-
+ EVT InVT = N->getOperand(0).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- // Only use the result of getSetCCResultType if it is legal,
- // otherwise just use the promoted result type (NVT).
- if (!TLI.isTypeLegal(SVT))
- SVT = NVT;
+ EVT SVT = getSetCCResultType(InVT);
+
+ // If we got back a type that needs to be promoted, this likely means the
+ // the input type also needs to be promoted. So get the promoted type for
+ // the input and try the query again.
+ if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) {
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ SVT = getSetCCResultType(InVT);
+ } else {
+ // Input type isn't promoted, just use the default promoted type.
+ SVT = NVT;
+ }
+ }
SDLoc dl(N);
assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
"Vector compare must return a vector result!");
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getValueType() != RHS.getValueType()) {
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger &&
- !LHS.getValueType().isVector())
- LHS = GetPromotedInteger(LHS);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger &&
- !RHS.getValueType().isVector())
- RHS = GetPromotedInteger(RHS);
- }
-
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
- N->getOperand(2));
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
- SDValue LHS = N->getOperand(0);
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = GetPromotedInteger(LHS);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
@@ -661,22 +659,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
// The input value must be properly sign extended.
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = SExtPromotedInteger(LHS);
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
// The input value must be properly zero extended.
- if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
- LHS = ZExtPromotedInteger(LHS);
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
@@ -904,11 +898,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
- DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
}
@@ -1001,11 +996,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redudant eventually.
unsigned OpLEffectiveBits =
- OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+ OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
unsigned OpREffectiveBits =
- OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
- if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() &&
- OpREffectiveBits <= NewRHS.getValueSizeInBits()) {
+ OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
+ OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
NewRHS = OpR;
} else {
@@ -1356,7 +1351,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
/// have invalid operands or may have other results that need promotion, we just
/// know that (at least) one result needs expansion.
void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Lo, Hi;
Lo = Hi = SDValue();
@@ -1413,6 +1409,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -2893,7 +2890,8 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
/// result types of the node are known to be legal, but other operands of the
/// node may need promotion or expansion as well as the specified one.
bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -2915,7 +2913,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
- case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
@@ -3051,15 +3048,14 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
- // Lower with SETCCE or SETCCCARRY if the target supports it.
+ // Lower with SETCCCARRY if the target supports it.
EVT HiVT = LHSHi.getValueType();
EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
// FIXME: Make all targets support this, then remove the other lowering.
- if (HasSETCCCARRY ||
- TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) {
- // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip
+ if (HasSETCCCARRY) {
+ // SETCCCARRY can detect < and >= directly. For > and <=, flip
// operands and condition code.
bool FlipOperands = false;
switch (CCCode) {
@@ -3074,17 +3070,15 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
std::swap(LHSHi, RHSHi);
}
// Perform a wide subtraction, feeding the carry from the low part into
- // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially
- // looking at the high part of the result of LHS - RHS. It is negative
- // iff LHS < RHS. It is zero or positive iff LHS >= RHS.
+ // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high
+ // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is
+ // zero or positive iff LHS >= RHS.
EVT LoVT = LHSLo.getValueType();
- SDVTList VTList = DAG.getVTList(
- LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue);
- SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl,
- VTList, LHSLo, RHSLo);
- SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl,
- getSetCCResultType(HiVT), LHSHi, RHSHi,
- LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT));
+ SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT),
+ LHSHi, RHSHi, LowCmp.getValue(1),
+ DAG.getCondCode(CCCode));
NewLHS = Res;
NewRHS = SDValue();
return;
@@ -3152,24 +3146,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
}
-SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue Carry = N->getOperand(2);
- SDValue Cond = N->getOperand(3);
- SDLoc dl = SDLoc(N);
-
- SDValue LHSLo, LHSHi, RHSLo, RHSHi;
- GetExpandedInteger(LHS, LHSLo, LHSHi);
- GetExpandedInteger(RHS, RHSLo, RHSHi);
-
- // Expand to a SUBE for the low part and a smaller SETCCE for the high.
- SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
- SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
- return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
- LowCmp.getValue(1), Cond);
-}
-
SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -3497,21 +3473,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
- // If the input type is legal and we can promote it to a legal type with the
- // same element size, go ahead do that to create a new concat.
- if (getTypeAction(N->getOperand(0).getValueType()) ==
- TargetLowering::TypeLegal) {
- EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
- if (TLI.isTypeLegal(InPromotedTy)) {
- SmallVector<SDValue, 8> Ops(NumOperands);
- for (unsigned i = 0; i < NumOperands; ++i) {
- Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
- N->getOperand(i));
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
- }
- }
-
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 4438ee7878b8..a9f144c06e9a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -84,9 +84,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
SDValue Res(&Node, i);
EVT VT = Res.getValueType();
bool Failed = false;
+ // Don't create a value in map.
+ auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
unsigned Mapped = 0;
- if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
Mapped |= 1;
// Check that remapped values are only used by nodes marked NewNode.
for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
@@ -97,30 +99,32 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
// Check that the final result of applying ReplacedValues is not
// marked NewNode.
- SDValue NewVal = ReplacedValues[Res];
- DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ auto NewValId = ReplacedValues[ResId];
+ auto I = ReplacedValues.find(NewValId);
while (I != ReplacedValues.end()) {
- NewVal = I->second;
- I = ReplacedValues.find(NewVal);
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
}
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
assert(NewVal.getNode()->getNodeId() != NewNode &&
"ReplacedValues maps to a new node!");
}
- if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())
Mapped |= 2;
- if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())
Mapped |= 4;
- if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())
Mapped |= 8;
- if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())
Mapped |= 16;
- if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())
Mapped |= 32;
- if (SplitVectors.find(Res) != SplitVectors.end())
+ if (ResId && SplitVectors.find(ResId) != SplitVectors.end())
Mapped |= 64;
- if (WidenedVectors.find(Res) != WidenedVectors.end())
+ if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())
Mapped |= 128;
- if (PromotedFloats.find(Res) != PromotedFloats.end())
+ if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
Mapped |= 256;
if (Node.getNodeId() != Processed) {
@@ -224,9 +228,9 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));
if (IgnoreNodeResults(N)) {
- DEBUG(dbgs() << "Ignoring node results\n");
+ LLVM_DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
}
@@ -234,11 +238,11 @@ bool DAGTypeLegalizer::run() {
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
- DEBUG(dbgs() << "Analyzing result type: " <<
- ResultVT.getEVTString() << "\n");
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString()
+ << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
- DEBUG(dbgs() << "Legal result type\n");
+ LLVM_DEBUG(dbgs() << "Legal result type\n");
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
@@ -296,11 +300,11 @@ ScanOperands:
continue;
const auto Op = N->getOperand(i);
- DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));
EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
case TargetLowering::TypeLegal:
- DEBUG(dbgs() << "Legal operand\n");
+ LLVM_DEBUG(dbgs() << "Legal operand\n");
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
@@ -370,7 +374,8 @@ ScanOperands:
}
if (i == NumOperands) {
- DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG);
+ dbgs() << "\n");
}
}
NodeDone:
@@ -490,9 +495,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
return N;
- // Remove any stale map entries.
- ExpungeNode(N);
-
// Okay, we know that this node is new. Recursively walk all of its operands
// to see if they are new also. The depth of this walk is bounded by the size
// of the new tree that was constructed (usually 2-3 nodes), so we don't worry
@@ -543,7 +545,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// to remap the operands, since they are the same as the operands we
// remapped above.
N = M;
- ExpungeNode(N);
}
}
@@ -564,100 +565,25 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
RemapValue(Val);
}
-/// If N has a bogus mapping in ReplacedValues, eliminate it.
-/// This can occur when a node is deleted then reallocated as a new node -
-/// the mapping in ReplacedValues applies to the deleted node, not the new
-/// one.
-/// The only map that can have a deleted node as a source is ReplacedValues.
-/// Other maps can have deleted nodes as targets, but since their looked-up
-/// values are always immediately remapped using RemapValue, resulting in a
-/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
-/// always performs correct mappings. In order to keep the mapping correct,
-/// ExpungeNode should be called on any new nodes *before* adding them as
-/// either source or target to ReplacedValues (which typically means calling
-/// Expunge when a new node is first seen, since it may no longer be marked
-/// NewNode by the time it is added to ReplacedValues).
-void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
- if (N->getNodeId() != NewNode)
- return;
-
- // If N is not remapped by ReplacedValues then there is nothing to do.
- unsigned i, e;
- for (i = 0, e = N->getNumValues(); i != e; ++i)
- if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
- break;
-
- if (i == e)
- return;
-
- // Remove N from all maps - this is expensive but rare.
-
- for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
- E = PromotedIntegers.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
- E = SoftenedFloats.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
- E = ScalarizedVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
- E = WidenedVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
- I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
- assert(I->first.getNode() != N);
- RemapValue(I->second.first);
- RemapValue(I->second.second);
- }
-
- for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
- E = ReplacedValues.end(); I != E; ++I)
- RemapValue(I->second);
-
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- ReplacedValues.erase(SDValue(N, i));
-}
-
/// If the specified value was already legalized to another value,
/// replace it by that value.
-void DAGTypeLegalizer::RemapValue(SDValue &N) {
- DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+void DAGTypeLegalizer::RemapValue(SDValue &V) {
+ auto Id = getTableId(V);
+ V = getSDValue(Id);
+}
+
+void DAGTypeLegalizer::RemapId(TableId &Id) {
+ auto I = ReplacedValues.find(Id);
if (I != ReplacedValues.end()) {
+ assert(Id != I->second && "Id is mapped to itself.");
// Use path compression to speed up future lookups if values get multiply
// replaced with other values.
- RemapValue(I->second);
- N = I->second;
+ RemapId(I->second);
+ Id = I->second;
- // Note that it is possible to have N.getNode()->getNodeId() == NewNode at
- // this point because it is possible for a node to be put in the map before
- // being processed.
+ // Note that N = IdToValueMap[Id] it is possible to have
+ // N.getNode()->getNodeId() == NewNode at this point because it is possible
+ // for a node to be put in the map before being processed.
}
}
@@ -714,19 +640,22 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
assert(From.getNode() != To.getNode() && "Potential legalization loop!");
// If expansion produced new nodes, make sure they are properly marked.
- ExpungeNode(From.getNode());
- AnalyzeNewValue(To); // Expunges To.
+ AnalyzeNewValue(To);
// Anything that used the old node should now use the new one. Note that this
// can potentially cause recursive merging.
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
do {
- DAG.ReplaceAllUsesOfValueWith(From, To);
- // The old node may still be present in a map like ExpandedIntegers or
- // PromotedIntegers. Inform maps about the replacement.
- ReplacedValues[From] = To;
+ // The old node may be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ auto FromId = getTableId(From);
+ auto ToId = getTableId(To);
+
+ if (FromId != ToId)
+ ReplacedValues[FromId] = ToId;
+ DAG.ReplaceAllUsesOfValueWith(From, To);
// Process the list of nodes that need to be reanalyzed.
while (!NodesToAnalyze.empty()) {
@@ -751,12 +680,15 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SDValue NewVal(M, i);
if (M->getNodeId() == Processed)
RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
// OldVal may be a target of the ReplacedValues map which was marked
// NewNode to force reanalysis because it was updated. Ensure that
// anything that ReplacedValues mapped to OldVal will now be mapped
// all the way to NewVal.
- ReplacedValues[OldVal] = NewVal;
+ auto OldValId = getTableId(OldVal);
+ auto NewValId = getTableId(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ if (OldValId != NewValId)
+ ReplacedValues[OldValId] = NewValId;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -773,9 +705,11 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
"Invalid type for promoted integer");
AnalyzeNewValue(Result);
- SDValue &OpEntry = PromotedIntegers[Op];
- assert(!OpEntry.getNode() && "Node is already promoted!");
- OpEntry = Result;
+ auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+
+ DAG.transferDbgValues(Op, Result);
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -788,15 +722,15 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
"Invalid type for softened float");
AnalyzeNewValue(Result);
- SDValue &OpEntry = SoftenedFloats[Op];
+ auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
// Allow repeated calls to save f128 type nodes
// or any node with type that transforms to itself.
// Many operations on these types are not softened.
- assert((!OpEntry.getNode()||
+ assert(((OpIdEntry == 0) ||
Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
"Node is already converted to integer!");
- OpEntry = Result;
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
@@ -805,9 +739,9 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
"Invalid type for promoted float");
AnalyzeNewValue(Result);
- SDValue &OpEntry = PromotedFloats[Op];
- assert(!OpEntry.getNode() && "Node is already promoted!");
- OpEntry = Result;
+ auto &OpIdEntry = PromotedFloats[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -818,19 +752,17 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
- SDValue &OpEntry = ScalarizedVectors[Op];
- assert(!OpEntry.getNode() && "Node is already scalarized!");
- OpEntry = Result;
+ auto &OpIdEntry = ScalarizedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already scalarized!");
+ OpIdEntry = getTableId(Result);
}
void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't expanded");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
}
void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
@@ -856,20 +788,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
}
// Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
- assert(!Entry.first.getNode() && "Node already expanded");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't expanded");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first != 0) && "Operand isn't expanded");
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
}
void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
@@ -882,21 +812,19 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
AnalyzeNewValue(Lo);
AnalyzeNewValue(Hi);
- // Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
- assert(!Entry.first.getNode() && "Node already expanded");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already expanded");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
SDValue &Hi) {
- std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
- RemapValue(Entry.first);
- RemapValue(Entry.second);
- assert(Entry.first.getNode() && "Operand isn't split");
- Lo = Entry.first;
- Hi = Entry.second;
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ Lo = getSDValue(Entry.first);
+ Hi = getSDValue(Entry.second);
+ assert(Lo.getNode() && "Operand isn't split");
+ ;
}
void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
@@ -912,10 +840,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
AnalyzeNewValue(Hi);
// Remember that this is the result of the node.
- std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
- assert(!Entry.first.getNode() && "Node already split");
- Entry.first = Lo;
- Entry.second = Hi;
+ std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)];
+ assert((Entry.first == 0) && "Node already split");
+ Entry.first = getTableId(Lo);
+ Entry.second = getTableId(Hi);
}
void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -924,9 +852,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
"Invalid type for widened vector");
AnalyzeNewValue(Result);
- SDValue &OpEntry = WidenedVectors[Op];
- assert(!OpEntry.getNode() && "Node already widened!");
- OpEntry = Result;
+ auto &OpIdEntry = WidenedVectors[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node already widened!");
+ OpIdEntry = getTableId(Result);
}
@@ -1064,11 +992,11 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
LVT.getSizeInBits() + HVT.getSizeInBits());
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);
Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
- DAG.getConstant(LVT.getSizeInBits(), dlHi,
- TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT));
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 64cb80e0d853..2c6b1ee7900f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -93,46 +93,81 @@ private:
N->getOpcode() == ISD::Register;
}
+ // Bijection from SDValue to unique id. As each created node gets a
+ // new id we do not need to worry about reuse expunging. Should we
+ // run out of ids, we can do a one time expensive compactifcation.
+ typedef unsigned TableId;
+
+ TableId NextValueId = 1;
+
+ SmallDenseMap<SDValue, TableId, 8> ValueToIdMap;
+ SmallDenseMap<TableId, SDValue, 8> IdToValueMap;
+
/// For integer nodes that are below legal width, this map indicates what
/// promoted value to use.
- SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
+ SmallDenseMap<TableId, TableId, 8> PromotedIntegers;
/// For integer nodes that need to be expanded this map indicates which
/// operands are the expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers;
/// For floating-point nodes converted to integers of the same size, this map
/// indicates the converted value to use.
- SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
+ SmallDenseMap<TableId, TableId, 8> SoftenedFloats;
/// For floating-point nodes that have a smaller precision than the smallest
/// supported precision, this map indicates what promoted value to use.
- SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
+ SmallDenseMap<TableId, TableId, 8> PromotedFloats;
/// For float nodes that need to be expanded this map indicates which operands
/// are the expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
/// For nodes that are <1 x ty>, this map indicates the scalar value of type
/// 'ty' to use.
- SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
+ SmallDenseMap<TableId, TableId, 8> ScalarizedVectors;
/// For nodes that need to be split this map indicates which operands are the
/// expanded version of the input.
- SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
+ SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors;
/// For vector nodes that need to be widened, indicates the widened value to
/// use.
- SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
+ SmallDenseMap<TableId, TableId, 8> WidenedVectors;
/// For values that have been replaced with another, indicates the replacement
/// value to use.
- SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
+ SmallDenseMap<TableId, TableId, 8> ReplacedValues;
/// This defines a worklist of nodes to process. In order to be pushed onto
/// this worklist, all operands of a node must have already been processed.
SmallVector<SDNode*, 128> Worklist;
+ TableId getTableId(SDValue V) {
+ assert(V.getNode() && "Getting TableId on SDValue()");
+
+ auto I = ValueToIdMap.find(V);
+ if (I != ValueToIdMap.end()) {
+ // replace if there's been a shift.
+ RemapId(I->second);
+ assert(I->second && "All Ids should be nonzero");
+ return I->second;
+ }
+ // Add if it's not there.
+ ValueToIdMap.insert(std::make_pair(V, NextValueId));
+ IdToValueMap.insert(std::make_pair(NextValueId, V));
+ ++NextValueId;
+ assert(NextValueId != 0 &&
+ "Ran out of Ids. Increase id type size or add compactification");
+ return NextValueId - 1;
+ }
+
+ const SDValue &getSDValue(TableId &Id) {
+ RemapId(Id);
+ assert(Id && "TableId should be non-zero");
+ return IdToValueMap[Id];
+ }
+
public:
explicit DAGTypeLegalizer(SelectionDAG &dag)
: TLI(dag.getTargetLoweringInfo()), DAG(dag),
@@ -147,10 +182,25 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
- ExpungeNode(Old);
- ExpungeNode(New);
- for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
- ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ TableId NewId = getTableId(SDValue(New, i));
+ TableId OldId = getTableId(SDValue(Old, i));
+
+ if (OldId != NewId)
+ ReplacedValues[OldId] = NewId;
+
+ // Delete Node from tables.
+ ValueToIdMap.erase(SDValue(Old, i));
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
}
SelectionDAG &getDAG() const { return DAG; }
@@ -158,9 +208,9 @@ public:
private:
SDNode *AnalyzeNewNode(SDNode *N);
void AnalyzeNewValue(SDValue &Val);
- void ExpungeNode(SDNode *N);
void PerformExpensiveChecks();
- void RemapValue(SDValue &N);
+ void RemapId(TableId &Id);
+ void RemapValue(SDValue &V);
// Common routines.
SDValue BitConvertToInteger(SDValue Op);
@@ -207,8 +257,8 @@ private:
/// returns an i32, the lower 16 bits of which coincide with Op, and the upper
/// 16 bits of which contain rubbish.
SDValue GetPromotedInteger(SDValue Op) {
- SDValue &PromotedOp = PromotedIntegers[Op];
- RemapValue(PromotedOp);
+ TableId &PromotedId = PromotedIntegers[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
@@ -282,7 +332,7 @@ private:
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
// Integer Operand Promotion.
- bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
SDValue PromoteIntOp_BITCAST(SDNode *N);
@@ -373,11 +423,10 @@ private:
bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
// Integer Operand Expansion.
- bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ bool ExpandIntegerOperand(SDNode *N, unsigned OpNo);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
- SDValue ExpandIntOp_SETCCE(SDNode *N);
SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
@@ -403,16 +452,15 @@ private:
/// stay in a register, the Op is not converted to an integer.
/// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
- auto Iter = SoftenedFloats.find(Op);
+ TableId Id = getTableId(Op);
+ auto Iter = SoftenedFloats.find(Id);
if (Iter == SoftenedFloats.end()) {
assert(isSimpleLegalType(Op.getValueType()) &&
"Operand wasn't converted to integer?");
return Op;
}
-
- SDValue &SoftenedOp = Iter->second;
+ SDValue SoftenedOp = getSDValue(Iter->second);
assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
- RemapValue(SoftenedOp);
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
@@ -531,7 +579,7 @@ private:
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
// Float Operand Expansion.
- bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
SDValue ExpandFloatOp_BR_CC(SDNode *N);
SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
@@ -549,8 +597,8 @@ private:
//===--------------------------------------------------------------------===//
SDValue GetPromotedFloat(SDValue Op) {
- SDValue &PromotedOp = PromotedFloats[Op];
- RemapValue(PromotedOp);
+ TableId &PromotedId = PromotedFloats[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
assert(PromotedOp.getNode() && "Operand wasn't promoted?");
return PromotedOp;
}
@@ -572,7 +620,7 @@ private:
SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
- bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
+ bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
@@ -589,15 +637,15 @@ private:
/// element type, this returns the element. For example, if Op is a v1i32,
/// Op = < i32 val >, this method returns val, an i32.
SDValue GetScalarizedVector(SDValue Op) {
- SDValue &ScalarizedOp = ScalarizedVectors[Op];
- RemapValue(ScalarizedOp);
+ TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)];
+ SDValue ScalarizedOp = getSDValue(ScalarizedId);
assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
return ScalarizedOp;
}
void SetScalarizedVector(SDValue Op, SDValue Result);
// Vector Result Scalarization: <1 x ty> -> ty.
- void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
@@ -646,13 +694,14 @@ private:
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
- void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVectorResult(SDNode *N, unsigned ResNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -662,9 +711,9 @@ private:
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
@@ -684,7 +733,7 @@ private:
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
- SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
@@ -701,8 +750,8 @@ private:
/// method returns a v4i32 for which the first two elements are the same as
/// those of Op, while the last two elements contain rubbish.
SDValue GetWidenedVector(SDValue Op) {
- SDValue &WidenedOp = WidenedVectors[Op];
- RemapValue(WidenedOp);
+ TableId &WidenedId = WidenedVectors[getTableId(Op)];
+ SDValue WidenedOp = getSDValue(WidenedId);
assert(WidenedOp.getNode() && "Operand wasn't widened?");
return WidenedOp;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 993465ae9dc2..df3134828af5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -300,6 +300,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+ Chain = Hi.getValue(1);
// Handle endianness of the load.
if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
@@ -307,7 +308,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Modified the chain - switch anything that used the old chain to use
// the new one.
- ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
@@ -384,7 +385,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
// Build a vector of twice the length out of the expanded elements.
// For example <3 x i64> -> <6 x i32>.
- std::vector<SDValue> NewElts;
+ SmallVector<SDValue, 16> NewElts;
NewElts.reserve(NumElts*2);
for (unsigned i = 0; i < NumElts; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7643790df350..67928d4bdbd5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -32,7 +32,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -41,6 +40,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
@@ -63,7 +63,7 @@ class VectorLegalizer {
/// legalizing the same thing more than once.
SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
- /// \brief Adds a node to the translation cache.
+ /// Adds a node to the translation cache.
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
// If someone requests legalization of the new node, return itself.
@@ -71,55 +71,55 @@ class VectorLegalizer {
LegalizedNodes.insert(std::make_pair(To, To));
}
- /// \brief Legalizes the given node.
+ /// Legalizes the given node.
SDValue LegalizeOp(SDValue Op);
- /// \brief Assuming the node is legal, "legalize" the results.
+ /// Assuming the node is legal, "legalize" the results.
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
- /// \brief Implements unrolling a VSETCC.
+ /// Implements unrolling a VSETCC.
SDValue UnrollVSETCC(SDValue Op);
- /// \brief Implement expand-based legalization of vector operations.
+ /// Implement expand-based legalization of vector operations.
///
/// This is just a high-level routine to dispatch to specific code paths for
/// operations to legalize them.
SDValue Expand(SDValue Op);
- /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+ /// Implements expansion for FNEG; falls back to UnrollVectorOp if
/// FSUB isn't legal.
///
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
- /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
- /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and bitcasts to the proper
/// type. The contents of the bits in the extended part of each element are
/// undef.
SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place, bitcasts to the proper
/// type, then shifts left and arithmetic shifts right to introduce a sign
/// extension.
SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and blends zeros into
/// the remaining lanes, finally bitcasting to the proper type.
SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
- /// \brief Expand bswap of vectors into a shuffle if legal.
+ /// Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
- /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+ /// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
@@ -130,29 +130,30 @@ class VectorLegalizer {
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
-
- /// \brief Implements vector promotion.
+ SDValue ExpandStrictFPOp(SDValue Op);
+
+ /// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
/// bitcasting the result back to the original type.
SDValue Promote(SDValue Op);
- /// \brief Implements [SU]INT_TO_FP vector promotion.
+ /// Implements [SU]INT_TO_FP vector promotion.
///
- /// This is a [zs]ext of the input operand to the next size up.
+ /// This is a [zs]ext of the input operand to a larger integer type.
SDValue PromoteINT_TO_FP(SDValue Op);
- /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+ /// Implements FP_TO_[SU]INT vector promotion of the result type.
///
- /// It is promoted to the next size up integer type. The result is then
+ /// It is promoted to a larger integer type. The result is then
/// truncated back to the original type.
- SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+ SDValue PromoteFP_TO_INT(SDValue Op);
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
- /// \brief Begin legalizer the vector operations in the DAG.
+ /// Begin legalizer the vector operations in the DAG.
bool Run();
};
@@ -222,14 +223,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
for (const SDValue &Op : Node->op_values())
Ops.push_back(LegalizeOp(Op));
- SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
+ Op.getResNo());
bool HasVectorValue = false;
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
- DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
+ Node->dump(&DAG));
switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
@@ -261,8 +264,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore()) {
- DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
- Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
+ Node->dump(&DAG));
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
@@ -287,10 +290,34 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (!HasVectorValue)
return TranslateLegalizeResults(Op, Result);
- EVT QueryType;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
switch (Op.getOpcode()) {
default:
return TranslateLegalizeResults(Op, Result);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ // These pseudo-ops get legalized as if they were their non-strict
+ // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
+ // is also legal, but if ISD::FSQRT requires expansion then so does
+ // ISD::STRICT_FSQRT.
+ Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
@@ -366,42 +393,47 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMAX:
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
- QueryType = Node->getValueType(0);
+ case ISD::FCANONICALIZE:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::FP_ROUND_INREG:
- QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<VTSDNode>(Node->getOperand(1))->getVT());
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- QueryType = Node->getOperand(0).getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
break;
case ISD::MSCATTER:
- QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
break;
case ISD::MSTORE:
- QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
}
- DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
- switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
Result = Promote(Op);
Changed = true;
break;
case TargetLowering::Legal:
- DEBUG(dbgs() << "Legal node: nothing to do\n");
+ LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
case TargetLowering::Custom: {
- DEBUG(dbgs() << "Trying custom legalization\n");
+ LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
- DEBUG(dbgs() << "Successfully custom legalized node\n");
+ LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
Result = Tmp1;
break;
}
- DEBUG(dbgs() << "Could not custom legalize node\n");
+ LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
}
case TargetLowering::Expand:
@@ -431,7 +463,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
// Promote the operation by extending the operand.
- return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ return PromoteFP_TO_INT(Op);
}
// There are currently two cases of vector promotion:
@@ -472,20 +504,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
- EVT VT = Op.getOperand(0).getValueType();
- assert(Op.getNode()->getNumValues() == 1 &&
- "Can't promote a vector with multiple results!");
-
- // Normal getTypeToPromoteTo() doesn't work here, as that will promote
- // by widening the vector w/ the same element width and twice the number
- // of elements. We want the other way around, the same number of elements,
- // each twice the width.
- //
- // Increase the bitwidth of the element to the next pow-of-two
- // (which is greater than 8 bits).
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
- EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
- assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
SDLoc dl(Op);
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
@@ -505,35 +528,28 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
- assert(Op.getNode()->getNumValues() == 1 &&
- "Can't promote a vector with multiple results!");
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
+ MVT VT = Op.getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+ "Vectors have different number of elements!");
- EVT NewVT = VT;
- unsigned NewOpc;
- while (true) {
- NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
- assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
- NewOpc = ISD::FP_TO_SINT;
- break;
- }
- if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
- NewOpc = ISD::FP_TO_UINT;
- break;
- }
- }
+ unsigned NewOpc = Op->getOpcode();
+ // Change FP_TO_UINT to FP_TO_SINT if possible.
+ // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
+ if (NewOpc == ISD::FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
SDLoc dl(Op);
- SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+ SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
: ISD::AssertSext,
- dl, NewVT, Promoted,
+ dl, NVT, Promoted,
DAG.getValueType(VT.getScalarType()));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
}
@@ -665,9 +681,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
} else {
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
-
- NewChain = Scalarized.getValue(1);
- Value = Scalarized.getValue(0);
+ // Skip past MERGE_VALUE node if known.
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
+ NewChain = Scalarized.getOperand(1);
+ Value = Scalarized.getOperand(0);
+ } else {
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
+ }
}
AddLegalizedOperand(Op.getValue(0), Value);
@@ -678,35 +699,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
-
- EVT StVT = ST->getMemoryVT();
- EVT MemSclVT = StVT.getScalarType();
- unsigned ScalarSize = MemSclVT.getSizeInBits();
-
- // Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize)) {
- // FIXME: This is completely broken and inconsistent with ExpandLoad
- // handling.
-
- // For sub-byte element sizes, this ends up with 0 stride between elements,
- // so the same element just gets re-written to the same location. There seem
- // to be tests explicitly testing for this broken behavior though. tests
- // for this broken behavior.
-
- LLVMContext &Ctx = *DAG.getContext();
-
- EVT NewMemVT
- = EVT::getVectorVT(Ctx,
- MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
- StVT.getVectorNumElements());
-
- SDValue NewVectorStore = DAG.getTruncStore(
- ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
- ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
- ST = cast<StoreSDNode>(NewVectorStore.getNode());
- }
-
SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
AddLegalizedOperand(Op, TF);
return TF;
@@ -743,6 +735,24 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandCTLZ(Op);
case ISD::CTTZ_ZERO_UNDEF:
return ExpandCTTZ_ZERO_UNDEF(Op);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ return ExpandStrictFPOp(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -1036,7 +1046,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
+ SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
// Clear upper part of LO, lower HI
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -1129,6 +1139,53 @@ SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NumOpers = Op.getNumOperands();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT ValueVTs[] = {EltVT, MVT::Other};
+ SDValue Chain = Op.getOperand(0);
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 32> OpValues;
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SmallVector<SDValue, 4> Opers;
+ SDValue Idx = DAG.getConstant(i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+
+ // The Chain is the first operand.
+ Opers.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned j = 1; j < NumOpers; ++j) {
+ SDValue Oper = Op.getOperand(j);
+ EVT OperVT = Oper.getValueType();
+
+ if (OperVT.isVector())
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ EltVT, Oper, Idx);
+
+ Opers.push_back(Oper);
+ }
+
+ SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+
+ OpValues.push_back(ScalarOp.getValue(0));
+ OpChains.push_back(ScalarOp.getValue(1));
+ }
+
+ SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+
+ AddLegalizedOperand(Op.getValue(0), Result);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return NewChain;
+}
+
SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumElems = VT.getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ce1c01b621f0..1cd43ace48f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -33,9 +33,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -169,9 +168,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().isVector()
+ && Op.getValueType().getVectorNumElements() == 1
+ && !isSimpleLegalType(Op.getValueType()))
+ Op = GetScalarizedVector(Op);
EVT NewVT = N->getValueType(0).getVectorElementType();
return DAG.getNode(ISD::BITCAST, SDLoc(N),
- NewVT, N->getOperand(0));
+ NewVT, Op);
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
@@ -338,8 +342,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
ScalarBool = TargetLowering::UndefinedBooleanContent;
}
+ EVT CondVT = Cond.getValueType();
if (ScalarBool != VecBool) {
- EVT CondVT = Cond.getValueType();
switch (ScalarBool) {
case TargetLowering::UndefinedBooleanContent:
break;
@@ -360,6 +364,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
}
}
+ // Truncate the condition if needed
+ auto BoolVT = getSetCCResultType(CondVT);
+ if (BoolVT.bitsLT(CondVT))
+ Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
+
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), Cond, LHS,
GetScalarizedVector(N->getOperand(2)));
@@ -433,9 +442,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
if (!Res.getNode()) {
@@ -515,7 +523,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -618,9 +626,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
/// invalid operands or may have other results that need legalization, we just
/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Split node result: ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");
SDValue Lo, Hi;
// See if the target wants to custom expand this node.
@@ -749,6 +755,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ SplitVecRes_StrictFPOp(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1028,6 +1053,56 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
}
+void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned NumOps = N->getNumOperands();
+ SDValue Chain = N->getOperand(0);
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 4> OpsLo;
+ SmallVector<SDValue, 4> OpsHi;
+
+ // The Chain is the first operand.
+ OpsLo.push_back(Chain);
+ OpsHi.push_back(Chain);
+
+ // Now process the remaining operands.
+ for (unsigned i = 1; i < NumOps; ++i) {
+ SDValue Op = N->getOperand(i);
+ SDValue OpLo = Op;
+ SDValue OpHi = Op;
+
+ EVT InVT = Op.getValueType();
+ if (InVT.isVector()) {
+ // If the input also splits, handle it directly for a
+ // compile time speedup. Otherwise split it by hand.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Op, OpLo, OpHi);
+ else
+ std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
+ }
+
+ OpsLo.push_back(OpLo);
+ OpsHi.push_back(OpHi);
+ }
+
+ EVT LoValueVTs[] = {LoVT, MVT::Other};
+ EVT HiValueVTs[] = {HiVT, MVT::Other};
+ Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+
+ // Build a factor node to remember that this Op is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -1200,16 +1275,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
+ HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
+ MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
-
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
@@ -1232,6 +1307,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
unsigned Alignment = MGT->getOriginalAlignment();
// Split Mask operand
@@ -1263,11 +1339,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
MMO);
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
MMO);
@@ -1365,8 +1441,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
- DEBUG(dbgs() << "Split vector extend via incremental extend:";
- N->dump(&DAG); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
// Extend the source vector by one step.
SDValue NewSrc =
DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
@@ -1501,9 +1577,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
/// the node are known to be legal, but other operands of the node may need
/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Split node operand: ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom split this node.
@@ -1683,8 +1757,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
// Use the appropriate scalar instruction on the split subvectors before
// reducing the now partially reduced smaller vector.
- SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
- return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
}
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
@@ -1810,6 +1884,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Ch = MGT->getChain();
SDValue Ptr = MGT->getBasePtr();
SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
SDValue Src0 = MGT->getValue();
unsigned Alignment = MGT->getOriginalAlignment();
@@ -1842,7 +1917,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
- SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO);
@@ -1852,7 +1927,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
Alignment, MGT->getAAInfo(),
MGT->getRanges());
- SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO);
@@ -1916,10 +1991,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
+ HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
+ N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore(), N->isCompressingStore());
@@ -1935,6 +2012,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
SDValue Index = N->getIndex();
+ SDValue Scale = N->getScale();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
@@ -1970,7 +2048,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO);
@@ -1982,7 +2060,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
- SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi};
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
DL, OpsHi, MMO);
}
@@ -2005,6 +2083,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ // Scalarize if the split halves are not byte-sized.
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
+ return TLI.scalarizeVectorStore(N, DAG);
+
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
@@ -2089,9 +2171,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
return SplitVecOp_UnaryOp(N);
SDLoc DL(N);
- // Extract the halves of the input via extract_subvector.
+ // Get the split input vector.
SDValue InLoVec, InHiVec;
- std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+ GetSplitVector(InVec, InLoVec, InHiVec);
// Truncate them to 1/2 the element size.
EVT HalfElementVT = IsFloat ?
EVT::getFloatingPointVT(InElementSize/2) :
@@ -2164,9 +2246,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
// See if the target wants to custom widen this node.
if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -2948,6 +3029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getValue());
+ SDValue Scale = N->getScale();
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
@@ -2963,7 +3045,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index.getValueType().getScalarType(),
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
N->getMemOperand());
@@ -3309,9 +3391,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
// Widen Vector Operand
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue Res = SDValue();
// See if the target wants to custom widen this node.
@@ -3374,11 +3455,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = N->getOperand(0);
- // If some legalization strategy other than widening is used on the operand,
- // we can't safely assume that just extending the low lanes is the correct
- // transformation.
- if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
- return WidenVecOp_Convert(N);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
InOp = GetWidenedVector(InOp);
assert(VT.getVectorNumElements() <
InOp.getValueType().getVectorNumElements() &&
@@ -3422,7 +3501,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
// low lanes.
switch (N->getOpcode()) {
default:
- llvm_unreachable("Extend legalization on on extend operation!");
+ llvm_unreachable("Extend legalization on extend operation!");
case ISD::ANY_EXTEND:
return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
case ISD::SIGN_EXTEND:
@@ -3440,20 +3519,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
- // Since the result is legal and the input is illegal, it is unlikely that we
- // can fix the input to a legal type so unroll the convert into some scalar
- // code and create a nasty build vector.
+ // Since the result is legal and the input is illegal.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
SDValue InOp = N->getOperand(0);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // See if a widened result type would be legal, if so widen the node.
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ InVT.getVectorNumElements());
+ if (TLI.isTypeLegal(WideVT)) {
+ SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
EVT InEltVT = InVT.getVectorElementType();
- unsigned Opcode = N->getOpcode();
+ // Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
for (unsigned i=0; i < NumElts; ++i)
Ops[i] = DAG.getNode(
@@ -3506,8 +3596,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
unsigned NumOperands = N->getNumOperands();
for (unsigned i=0; i < NumOperands; ++i) {
SDValue InOp = N->getOperand(i);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
+ assert(getTypeAction(InOp.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unexpected type action");
+ InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
Ops[Idx++] = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
@@ -3533,6 +3625,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
// vector type.
StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (!ST->getMemoryVT().getScalarType().isByteSized())
+ return TLI.scalarizeVectorStore(ST, DAG);
+
SmallVector<SDValue, 16> StChain;
if (ST->isTruncatingStore())
GenWidenVectorTruncStores(StChain, ST);
@@ -3576,6 +3671,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
EVT MaskVT = Mask.getValueType();
+ SDValue Scale = MSC->getScale();
// Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
@@ -3595,7 +3691,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
NumElts);
Index = ModifyToType(Index, WideIndexVT);
- SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index,
+ Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), dl, Ops,
MSC->getMemOperand());
@@ -3605,6 +3702,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDLoc dl(N);
+ EVT VT = N->getValueType(0);
// WARNING: In this code we widen the compare instruction with garbage.
// This garbage may contain denormal floats which may be slow. Is this a real
@@ -3614,18 +3712,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Only some of the compared elements are legal.
EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
InOp0.getValueType());
+ // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
+ if (VT.getScalarType() == MVT::i1)
+ SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ SVT.getVectorNumElements());
+
SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
- SVT, InOp0, InOp1, N->getOperand(2));
+ SVT, InOp0, InOp1, N->getOperand(2));
// Extract the needed results from the result vector.
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
- N->getValueType(0).getVectorNumElements());
+ VT.getVectorNumElements());
SDValue CC = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return PromoteTargetBoolean(CC, N->getValueType(0));
+ return PromoteTargetBoolean(CC, VT);
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index cf92907a8b5f..7e6b57426338 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -32,7 +32,8 @@ public:
enum DbgValueKind {
SDNODE = 0, ///< Value is the result of an expression.
CONST = 1, ///< Value is a constant.
- FRAMEIX = 2 ///< Value is contents of a stack location.
+ FRAMEIX = 2, ///< Value is contents of a stack location.
+ VREG = 3 ///< Value is a virtual register.
};
private:
union {
@@ -42,6 +43,7 @@ private:
} s;
const Value *Const; ///< Valid for constants.
unsigned FrameIx; ///< Valid for stack objects.
+ unsigned VReg; ///< Valid for registers.
} u;
DIVariable *Var;
DIExpression *Expr;
@@ -69,12 +71,18 @@ public:
u.Const = C;
}
- /// Constructor for frame indices.
- SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl,
- unsigned O)
- : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
- kind = FRAMEIX;
- u.FrameIx = FI;
+ /// Constructor for virtual registers and frame indices.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx,
+ bool IsIndirect, DebugLoc DL, unsigned Order,
+ enum DbgValueKind Kind)
+ : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) {
+ assert((Kind == VREG || Kind == FRAMEIX) &&
+ "Invalid SDDbgValue constructor");
+ kind = Kind;
+ if (kind == VREG)
+ u.VReg = VRegOrFrameIdx;
+ else
+ u.FrameIx = VRegOrFrameIdx;
}
/// Returns the kind.
@@ -98,6 +106,9 @@ public:
/// Returns the FrameIx for a stack object
unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
+ /// Returns the Virtual Register for a VReg
+ unsigned getVReg() const { assert (kind==VREG); return u.VReg; }
+
/// Returns whether this is an indirect value.
bool isIndirect() const { return IsIndirect; }
@@ -115,6 +126,28 @@ public:
bool isInvalidated() const { return Invalid; }
};
+/// Holds the information from a dbg_label node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+class SDDbgLabel {
+ MDNode *Label;
+ DebugLoc DL;
+ unsigned Order;
+
+public:
+ SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O)
+ : Label(Label), DL(std::move(dl)), Order(O) {}
+
+ /// Returns the MDNode pointer for the label.
+ MDNode *getLabel() const { return Label; }
+
+ /// Returns the DebugLoc.
+ DebugLoc getDebugLoc() const { return DL; }
+
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
+ unsigned getOrder() const { return Order; }
+};
+
} // end llvm namespace
#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 698e14453d1d..3944d7df286d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#include "InstrEmitter.h"
#include "ScheduleDAGSDNodes.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -115,7 +116,7 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGFast::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
@@ -124,8 +125,8 @@ void ScheduleDAGFast::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
+ .dumpAll(this));
// Execute the actual scheduling loop.
ListScheduleBottomUp();
@@ -180,8 +181,8 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
@@ -236,7 +237,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
- DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -346,7 +347,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
SU = NewSU;
}
- DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
NewSU = Clone(SU);
// New SUnit has the exact same predecessors.
@@ -592,14 +593,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
- DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
@@ -666,8 +667,8 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
// These nodes do not need to be translated into MIs.
return;
- DEBUG(dbgs() << "\n*** Scheduling: ");
- DEBUG(N->dump(DAG));
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling: ");
+ LLVM_DEBUG(N->dump(DAG));
Sequence.push_back(N);
unsigned NumOps = N->getNumOperands();
@@ -713,7 +714,7 @@ static SDNode *findGluedUser(SDNode *N) {
}
void ScheduleDAGLinearize::Schedule() {
- DEBUG(dbgs() << "********** DAG Linearization **********\n");
+ LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");
SmallVector<SDNode*, 8> Glues;
unsigned DAGSize = 0;
@@ -763,19 +764,29 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
- DEBUG({
- dbgs() << "\n*** Final schedule ***\n";
- });
+ LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
- // FIXME: Handle dbg_values.
unsigned NumNodes = Sequence.size();
+ MachineBasicBlock *BB = Emitter.getBlock();
for (unsigned i = 0; i != NumNodes; ++i) {
SDNode *N = Sequence[NumNodes-i-1];
- DEBUG(N->dump(DAG));
+ LLVM_DEBUG(N->dump(DAG));
Emitter.EmitNode(N, false, false, VRBaseMap);
+
+ // Emit any debug values associated with the node.
+ if (N->getHasDebugValue()) {
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (DV->isInvalidated())
+ continue;
+ if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap))
+ BB->insert(InsertPos, DbgMI);
+ DV->setIsInvalidated();
+ }
+ }
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
InsertPos = Emitter.getInsertPos();
return Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 49f304c8cc86..43e8ffd3839c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -37,6 +36,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -46,6 +46,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -346,8 +347,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
- << " '" << BB->getName() << "' **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
CurCycle = 0;
IssueCount = 0;
@@ -364,8 +365,7 @@ void ScheduleDAGRRList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- DEBUG(for (SUnit &SU : SUnits)
- SU.dumpAll(this));
+ LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -377,11 +377,11 @@ void ScheduleDAGRRList::Schedule() {
AvailableQueue->releaseState();
- DEBUG({
- dbgs() << "*** Final schedule ***\n";
- dumpSchedule();
- dbgs() << '\n';
- });
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
//===----------------------------------------------------------------------===//
@@ -728,13 +728,13 @@ static void resetVRegCycle(SUnit *SU);
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
- DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
- DEBUG(dbgs() << " Height [" << SU->getHeight()
- << "] pipeline stall!\n");
+ LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
@@ -827,8 +827,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
/// its predecessor states to reflect the change.
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
- DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ LLVM_DEBUG(SU->dump(this));
for (SDep &Pred : SU->Preds) {
CapturePred(&Pred);
@@ -1010,7 +1010,35 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
computeLatency(LoadSU);
}
- DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ bool isNewN = true;
+ SUnit *NewSU;
+ // This can only happen when isNewLoad is false.
+ if (N->getNodeId() != -1) {
+ NewSU = &SUnits[N->getNodeId()];
+ // If NewSU has already been scheduled, we need to clone it, but this
+ // negates the benefit to unfolding so just return SU.
+ if (NewSU->isScheduled)
+ return SU;
+ isNewN = false;
+ } else {
+ NewSU = CreateNewSUnit(N);
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+ }
+
+ LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
// Now that we are committed to unfolding replace DAG Uses.
for (unsigned i = 0; i != NumVals; ++i)
@@ -1018,23 +1046,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
SDValue(LoadNode, 1));
- SUnit *NewSU = CreateNewSUnit(N);
- assert(N->getNodeId() == -1 && "Node already inserted!");
- N->setNodeId(NewSU->NodeNum);
-
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
- if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- NewSU->isTwoAddress = true;
- break;
- }
- }
- if (MCID.isCommutable())
- NewSU->isCommutable = true;
-
- InitNumRegDefsLeft(NewSU);
- computeLatency(NewSU);
-
// Record all the edges to and from the old SU, by category.
SmallVector<SDep, 4> ChainPreds;
SmallVector<SDep, 4> ChainSuccs;
@@ -1100,7 +1111,8 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
- AvailableQueue->addNode(NewSU);
+ if (isNewN)
+ AvailableQueue->addNode(NewSU);
++NumUnfolds;
@@ -1117,22 +1129,36 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!N)
return nullptr;
- if (SU->getNode()->getGluedNode())
+ LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
+ LLVM_DEBUG(SU->dump(this));
+
+ if (N->getGluedNode() &&
+ !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Giving up because it has incoming glue and the target does not "
+ "want to copy it\n");
return nullptr;
+ }
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
MVT VT = N->getSimpleValueType(i);
- if (VT == MVT::Glue)
+ if (VT == MVT::Glue) {
+ LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
return nullptr;
- else if (VT == MVT::Other)
+ } else if (VT == MVT::Other)
TryUnfold = true;
}
for (const SDValue &Op : N->op_values()) {
MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
- if (VT == MVT::Glue)
+ if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
+ LLVM_DEBUG(
+ dbgs() << "Giving up because it one of the operands is glue and "
+ "the target does not want to copy it\n");
return nullptr;
+ }
}
// If possible unfold instruction.
@@ -1147,7 +1173,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return SU;
}
- DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
NewSU = CreateClone(SU);
// New SUnit has the exact same predecessors.
@@ -1408,7 +1434,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
// Furthermore, it may have been made available again, in which case it is
// now already in the AvailableQueue.
if (SU->isAvailable && !SU->NodeQueueId) {
- DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
AvailableQueue->push(SU);
}
if (i < Interferences.size())
@@ -1429,12 +1455,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
- DEBUG(dbgs() << " Interfering reg ";
- if (LRegs[0] == TRI->getNumRegs())
- dbgs() << "CallResource";
- else
- dbgs() << printReg(LRegs[0], TRI);
- dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ LLVM_DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
+ else dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
std::pair<LRegsMapT::iterator, bool> LRegsPair =
LRegsMap.insert(std::make_pair(CurSU, LRegs));
if (LRegsPair.second) {
@@ -1480,17 +1504,17 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (!BtSU->isPending)
AvailableQueue->remove(BtSU);
}
- DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
- << TrySU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
+ << ") to SU(" << TrySU->NodeNum << ")\n");
AddPred(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
// node is no longer available.
if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
- DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
+ LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
CurSU = AvailableQueue->pop();
} else {
- DEBUG(dbgs() << "TrySU available\n");
+ LLVM_DEBUG(dbgs() << "TrySU available\n");
// Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
@@ -1534,14 +1558,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
@@ -1569,8 +1593,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty() || !Interferences.empty()) {
- DEBUG(dbgs() << "\nExamining Available:\n";
- AvailableQueue->dump(this));
+ LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
// consideration.
@@ -2033,8 +2057,8 @@ LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
- DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
- << RegLimit[Id] << '\n');
+ LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
}
}
#endif
@@ -2186,14 +2210,15 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (RegPressure[RCId] < Cost) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
- DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
+ << ") has too many regdefs\n");
RegPressure[RCId] = 0;
}
else {
RegPressure[RCId] -= Cost;
}
}
- DEBUG(dumpRegPressure());
+ LLVM_DEBUG(dumpRegPressure());
}
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
@@ -2273,7 +2298,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
}
}
- DEBUG(dumpRegPressure());
+ LLVM_DEBUG(dumpRegPressure());
}
//===----------------------------------------------------------------------===//
@@ -2368,7 +2393,7 @@ static void initVRegCycle(SUnit *SU) {
if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
return;
- DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
SU->isVRegCycle = true;
@@ -2406,7 +2431,7 @@ static bool hasVRegCycleUse(const SUnit *SU) {
if (Pred.isCtrl()) continue; // ignore chain preds
if (Pred.getSUnit()->isVRegCycle &&
Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
- DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
return true;
}
}
@@ -2466,9 +2491,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
- << ") depth " << LDepth << " vs SU (" << right->NodeNum
- << ") depth " << RDepth << "\n");
+ LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
@@ -2490,9 +2515,9 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
static const char *const PhysRegMsg[] = { " has no physreg",
" defines a physreg" };
#endif
- DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
- << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
- << PhysRegMsg[RHasPhysReg] << "\n");
+ LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
+ << ") " << PhysRegMsg[RHasPhysReg] << "\n");
return LHasPhysReg < RHasPhysReg;
}
}
@@ -2636,13 +2661,13 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
- << right->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
- << left->NodeNum << ")\n");
+ LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
}
if (!LHigh && !RHigh) {
@@ -2704,8 +2729,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
- << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
+ << "): " << LPDiff << " != SU(" << right->NodeNum
+ << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
}
@@ -2717,8 +2743,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
}
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
- DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
- << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses
+ << "\n");
return LLiveUses < RLiveUses;
}
@@ -2732,9 +2759,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedCriticalPath) {
int spread = (int)left->getDepth() - (int)right->getDepth();
if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
- << left->getDepth() << " != SU(" << right->NodeNum << "): "
- << right->getDepth() << "\n");
+ LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum
+ << "): " << right->getDepth() << "\n");
return left->getDepth() < right->getDepth();
}
}
@@ -2955,9 +2982,10 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Ok, the transformation is safe and the heuristics suggest it is
// profitable. Update the graph.
- DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
- << " next to PredSU #" << PredSU->NodeNum
- << " to guide scheduling in the presence of multiple uses\n");
+ LLVM_DEBUG(
+ dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
+ << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
assert(!Edge.isAssignedRegDep());
@@ -3046,8 +3074,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
(isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU.isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, &SU)) {
- DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
- << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ LLVM_DEBUG(dbgs()
+ << " Adding a pseudo-two-addr edge from SU #"
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index c09b47af26a6..430d8fb34476 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -243,7 +244,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
return;
// Sort them in increasing order.
- std::sort(Offsets.begin(), Offsets.end());
+ llvm::sort(Offsets.begin(), Offsets.end());
// Check if the loads are close enough.
SmallVector<SDNode*, 4> Loads;
@@ -910,6 +911,39 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MachineBasicBlock *InsertBB = Emitter.getBlock();
MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+
+ SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
+ SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
+ // Now emit the rest according to source order.
+ LastOrder = 0;
+ for (const auto &InstrOrder : Orders) {
+ unsigned Order = InstrOrder.first;
+ MachineInstr *MI = InstrOrder.second;
+ if (!MI)
+ continue;
+
+ // Insert all SDDbgLabel's whose order(s) are before "Order".
+ for (; DLI != DLE &&
+ (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
+ ++DLI) {
+ MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ if (DLI == DLE)
+ break;
+
+ LastOrder = Order;
+ }
}
InsertPos = Emitter.getInsertPos();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index a058942c5689..6417e16bd0fd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -17,10 +17,10 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <string>
#include <vector>
@@ -88,7 +88,7 @@ class InstrItineraryData;
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
///
- SUnit *Clone(SUnit *N);
+ SUnit *Clone(SUnit *Old);
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 07b46b9183ab..84055f8ecc1a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -93,8 +93,8 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGVLIW::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
- << " '" << BB->getName() << "' **********\n");
+ LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
BuildSchedGraph(AA);
@@ -151,8 +151,8 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
- DEBUG(SU->dump(this));
+ LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ LLVM_DEBUG(SU->dump(this));
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -246,7 +246,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
- DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
HazardRec->AdvanceCycle();
++NumStalls;
++CurCycle;
@@ -254,7 +254,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
// Otherwise, we have no instructions to issue and we have instructions
// that will fault if we don't do this right. This is the case for
// processors without pipeline interlocks and other cases.
- DEBUG(dbgs() << "*** Emitting noop\n");
+ LLVM_DEBUG(dbgs() << "*** Emitting noop\n");
HazardRec->EmitNoop();
Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4c8b63d2f239..48e03c6da68f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -58,6 +57,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
@@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
#define DEBUG_TYPE "selectiondag"
+static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt",
+ cl::Hidden, cl::init(true),
+ cl::desc("Gang up loads and stores generated by inlining of memcpy"));
+
+static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max",
+ cl::desc("Number limit for gluing ld/st of memcpy."),
+ cl::Hidden, cl::init(0));
+
static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
- DEBUG(
- dbgs() << Msg;
- V.getNode()->dump(G);
- );
+ LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););
}
//===----------------------------------------------------------------------===//
@@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return true;
}
+bool ISD::matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match) {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ if (LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!LHSCst || !RHSCst)
+ return false;
+ if (LHSCst->getValueType(0) != SVT ||
+ LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::MLOAD: {
+ const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
+ ID.AddInteger(MLD->getMemoryVT().getRawBits());
+ ID.AddInteger(MLD->getRawSubclassData());
+ ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSTORE: {
+ const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ ID.AddInteger(MST->getMemoryVT().getRawBits());
+ ID.AddInteger(MST->getRawSubclassData());
+ ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MGATHER: {
+ const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N);
+ ID.AddInteger(MG->getMemoryVT().getRawBits());
+ ID.AddInteger(MG->getRawSubclassData());
+ ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::MSCATTER: {
+ const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N);
+ ID.AddInteger(MS->getMemoryVT().getRawBits());
+ ID.AddInteger(MS->getRawSubclassData());
+ ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_CLR:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
@@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {
}
#endif // NDEBUG
-/// \brief Insert a newly allocated node into the DAG.
+/// Insert a newly allocated node into the DAG.
///
/// Handles insertion into the all nodes list and CSE map, as well as
/// verification and other common operations when a new node is allocated.
@@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
- Pass *PassPtr) {
+ Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
+ DivergenceAnalysis * Divergence) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
+ LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
+ DA = Divergence;
}
SelectionDAG::~SelectionDAG() {
@@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue TrueValue;
- switch (TLI->getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- TrueValue = getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
- VT);
- break;
- }
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
+SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
+ EVT OpVT) {
+ if (!V)
+ return getConstant(0, DL, VT);
+
+ switch (TLI->getBooleanContents(OpVT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ return getConstant(1, DL, VT);
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return getAllOnesConstant(DL, VT);
+ }
+ llvm_unreachable("Unexpected boolean content enum!");
+}
+
SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
@@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
@@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
return SDValue(N, 0);
if (!N) {
- N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
@@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (N1.isUndef())
commuteShuffle(N1, N2, MaskVec);
- // If shuffling a splat, try to blend the splat instead. We do this here so
- // that even when this arises during lowering we don't have to re-handle it.
- auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
- BitVector UndefElements;
- SDValue Splat = BV->getSplatValue(&UndefElements);
- if (!Splat)
- return;
+ if (TLI->hasVectorBlend()) {
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
- for (int i = 0; i < NElts; ++i) {
- if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
- continue;
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
- // If this input comes from undef, mark it as such.
- if (UndefElements[MaskVec[i] - Offset]) {
- MaskVec[i] = -1;
- continue;
- }
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
- // If we can blend a non-undef lane, use that instead.
- if (!UndefElements[i])
- MaskVec[i] = i + Offset;
- }
- };
- if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
- BlendSplat(N1BV, 0);
- if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
- BlendSplat(N2BV, NElts);
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+ }
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
@@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
- MVT VT = SV.getSimpleValueType(0);
+ EVT VT = SV.getValueType(0);
SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
ShuffleVectorSDNode::commuteMask(MaskVec);
@@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
+ EVT OpVT = N1.getValueType();
+
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
- case ISD::SETFALSE2: return getConstant(0, dl, VT);
+ case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt =
- TLI->getBooleanContents(N1->getValueType(0));
- return getConstant(
- Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
- VT);
- }
+ case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);
case ISD::SETOEQ:
case ISD::SETOGT:
@@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
- case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
- case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
- case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
- case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
- case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
- case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
- case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
- case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
- case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
+ case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
+ case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
+ case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
+ case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
+ case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
+ case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
+ case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
+ case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
+ case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
}
}
}
@@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
LLVM_FALLTHROUGH;
- case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
- case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual, dl, VT);
- case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
- case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan, dl, VT);
- case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpUnordered, dl, VT);
- case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
- case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
}
} else {
// Ensure that the constant occurs on the RHS.
@@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
- // Support big-endian targets when it becomes useful.
bool IsLE = getDataLayout().isLittleEndian();
- if (!IsLE)
- break;
// Bitcast 'small element' vector to 'large element' scalar/vector.
if ((BitWidth % SubBitWidth) == 0) {
@@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
for (unsigned i = 0; i != SubScale; ++i) {
computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
Depth + 1);
- Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
- Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
+ unsigned Shifts = IsLE ? i : SubScale - 1 - i;
+ Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts);
+ Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);
}
}
@@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
- unsigned Offset = (i % SubScale) * BitWidth;
+ unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+ unsigned Offset = (Shifts % SubScale) * BitWidth;
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
@@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
case ISD::SMULO:
case ISD::UMULO:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents.
@@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::SMIN:
case ISD::SMAX: {
- computeKnownBits(Op.getOperand(0), Known, DemandedElts,
- Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ const APInt &ValueHigh = CstHigh->getAPIntValue();
+ if (ValueLow.sle(ValueHigh)) {
+ unsigned LowSignBits = ValueLow.getNumSignBits();
+ unsigned HighSignBits = ValueHigh.getNumSignBits();
+ unsigned MinSignBits = std::min(LowSignBits, HighSignBits);
+ if (ValueLow.isNegative() && ValueHigh.isNegative()) {
+ Known.One.setHighBits(MinSignBits);
+ break;
+ }
+ if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) {
+ Known.Zero.setHighBits(MinSignBits);
+ break;
+ }
+ }
+ }
+
+ // Fallback - just get the shared known bits of the operands.
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ if (Known.isUnknown()) break; // Early-out
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
@@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (!DemandedElts)
return 1; // No demanded elts, better to assume we don't know anything.
- switch (Op.getOpcode()) {
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
default: break;
case ISD::AssertSext:
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
@@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::min(Tmp, Tmp2);
case ISD::SMIN:
- case ISD::SMAX:
+ case ISD::SMAX: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ bool IsMax = (Opcode == ISD::SMAX);
+ ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
+ if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
+ CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
+ DemandedElts);
+ if (CstLow && CstHigh) {
+ if (!IsMax)
+ std::swap(CstLow, CstHigh);
+ if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) {
+ Tmp = CstLow->getAPIntValue().getNumSignBits();
+ Tmp2 = CstHigh->getAPIntValue().getNumSignBits();
+ return std::min(Tmp, Tmp2);
+ }
+ }
+
+ // Fallback - just get the minimum number of sign bits of the operands.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
case ISD::UMIN:
case ISD::UMAX:
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
- if (Op.getOpcode() == ISD::ROTR)
+ if (Opcode == ISD::ROTR)
RotAmt = (VTBits - RotAmt) % VTBits;
// If we aren't rotating out all of the known-in sign bits, return the
@@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
// Allow the target to implement this method for its nodes.
- if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
- Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
- Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ if (Opcode >= ISD::BUILTIN_OP_END ||
+ Opcode == ISD::INTRINSIC_WO_CHAIN ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::INTRINSIC_VOID) {
unsigned NumBits =
TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
if (NumBits > 1)
@@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
return false;
}
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
+ assert(Op.getValueType().isFloatingPoint() &&
+ "Floating point type expected");
+
// If the value is a constant, we can obviously see if it is a zero or not.
+ // TODO: Add BuildVector support.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ assert(!Op.getValueType().isFloatingPoint() &&
+ "Floating point types unsupported - use isKnownNeverZeroFloat");
+
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (ISD::matchUnaryPredicate(
+ Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ return true;
// TODO: Recognize more cases here.
switch (Op.getOpcode()) {
default: break;
case ISD::OR:
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
- return !C->isNullValue();
+ if (isKnownNeverZero(Op.getOperand(1)) ||
+ isKnownNeverZero(Op.getOperand(0)))
+ return true;
break;
}
@@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+// FIXME: unify with llvm::haveNoCommonBitsSet.
+// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
@@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- // (ext (trunx x)) -> x
+ // (ext (trunc x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = Operand.getOperand(0);
- if (OpOp.getValueType() == VT)
+ if (OpOp.getValueType() == VT) {
+ transferDbgValues(Operand, OpOp);
return OpOp;
+ }
}
break;
case ISD::TRUNCATE:
@@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
- // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
+ if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
- Operand.getOperand(0), Operand.getNode()->getFlags());
+ Operand.getOperand(0), Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
@@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath) {
- if (Opcode == ISD::FADD) {
- // x+0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FSUB) {
- // x-0 --> x
- if (N2CFP && N2CFP->getValueAPF().isZero())
- return N1;
- } else if (Opcode == ISD::FMUL) {
- // x*0 --> 0
- if (N2CFP && N2CFP->isZero())
- return N2;
- // x*1 --> x
- if (N2CFP && N2CFP->isExactlyValue(1.0))
- return N1;
- }
- }
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
@@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::EXTRACT_VECTOR_ELT:
+ assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
+ "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
+ element type of the vector.");
+
// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
- if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
@@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
}
+ // Any FP binop with an undef operand is folded to NaN. This matches the
+ // behavior of the IR optimizer.
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
if (TLI->isCommutativeBinOp(Opcode)) {
@@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
- case ISD::FSUB:
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::SRA:
- return N1; // fold op(undef, arg2) -> undef
+ return getUNDEF(VT); // fold op(undef, arg2) -> undef
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N2;
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
}
}
}
@@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
- return N2; // fold op(arg1, undef) -> undef
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- if (getTarget().Options.UnsafeFPMath)
- return N2;
- break;
- case ISD::MUL:
- case ISD::AND:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
- if (!VT.isVector())
- return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
- // For vectors, we can't easily build an all zero vector, just return
- // the LHS.
- return N1;
+ return getUNDEF(VT); // fold op(arg1, undef) -> undef
+ case ISD::MUL:
+ case ISD::AND:
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
case ISD::OR:
- if (!VT.isVector())
- return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
- // For vectors, we can't easily build an all one vector, just return
- // the LHS.
- return N1;
- case ISD::SRA:
- return N1;
+ return getAllOnesConstant(DL, VT);
}
}
@@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3) {
+ SDValue N1, SDValue N2, SDValue N3,
+ const SDNodeFlags Flags) {
// Perform various simplifications.
switch (Opcode) {
case ISD::FMA: {
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == VT && N2.getValueType() == VT &&
+ N3.getValueType() == VT && "FMA types must match!");
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
@@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
+ }
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
@@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
return MF.getFunction().optForSize();
}
+static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SmallVector<SDValue, 32> &OutChains, unsigned From,
+ unsigned To, SmallVector<SDValue, 16> &OutLoadChains,
+ SmallVector<SDValue, 16> &OutStoreChains) {
+ assert(OutLoadChains.size() && "Missing loads in memcpy inlining");
+ assert(OutStoreChains.size() && "Missing stores in memcpy inlining");
+ SmallVector<SDValue, 16> GluedLoadChains;
+ for (unsigned i = From; i < To; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ GluedLoadChains.push_back(OutLoadChains[i]);
+ }
+
+ // Chain for all loads.
+ SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ GluedLoadChains);
+
+ for (unsigned i = From; i < To; ++i) {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]);
+ SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ OutChains.push_back(NewStore);
+ }
+}
+
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, unsigned Align,
@@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachineMemOperand::Flags MMOFlags =
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
- SmallVector<SDValue, 8> OutChains;
+ SmallVector<SDValue, 16> OutLoadChains;
+ SmallVector<SDValue, 16> OutStoreChains;
+ SmallVector<SDValue, 32> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
for (unsigned i = 0; i != NumMemOps; ++i) {
@@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SubSlice.Length = VTSize;
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
- if (Value.getNode())
+ if (Value.getNode()) {
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), Align,
MMOFlags);
+ OutChains.push_back(Store);
+ }
}
if (!Store.getNode()) {
@@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
- OutChains.push_back(Value.getValue(1));
+ OutLoadChains.push_back(Value.getValue(1));
+
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ OutStoreChains.push_back(Store);
}
- OutChains.push_back(Store);
SrcOff += VTSize;
DstOff += VTSize;
Size -= VTSize;
}
+ unsigned GluedLdStLimit = MaxLdStGlue == 0 ?
+ TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue;
+ unsigned NumLdStInMemcpy = OutStoreChains.size();
+
+ if (NumLdStInMemcpy) {
+ // It may be that memcpy might be converted to memset if it's memcpy
+ // of constants. In such a case, we won't have loads and stores, but
+ // just stores. In the absence of loads, there is nothing to gang up.
+ if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) {
+ // If target does not care, just leave as it.
+ for (unsigned i = 0; i < NumLdStInMemcpy; ++i) {
+ OutChains.push_back(OutLoadChains[i]);
+ OutChains.push_back(OutStoreChains[i]);
+ }
+ } else {
+ // Ld/St less than/equal limit set by target.
+ if (NumLdStInMemcpy <= GluedLdStLimit) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ NumLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ } else {
+ unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit;
+ unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit;
+ unsigned GlueIter = 0;
+
+ for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) {
+ unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit;
+ unsigned IndexTo = NumLdStInMemcpy - GlueIter;
+
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo,
+ OutLoadChains, OutStoreChains);
+ GlueIter += GluedLdStLimit;
+ }
+
+ // Residual ld/st.
+ if (RemainingLdStInMemcpy) {
+ chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0,
+ RemainingLdStInMemcpy, OutLoadChains,
+ OutStoreChains);
+ }
+ }
+ }
+ }
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
@@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
-/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// Lower the call to 'memset' intrinsic function into a series of store
/// operations.
///
/// \param DAG Selection DAG where lowered code is placed.
@@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Src, unsigned SrcAlign,
+ SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, unsigned Align,
bool isVol, bool isTailCall,
@@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
return CallResult.second;
}
+SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
+ SDValue Dst, unsigned DstAlign,
+ SDValue Value, SDValue Size, Type *SizeTy,
+ unsigned ElemSz, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*getContext());
+ Entry.Node = Value;
+ Args.push_back(Entry);
+
+ Entry.Ty = SizeTy;
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI->getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(LibraryCall),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
@@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_CLR ||
Opcode == ISD::ATOMIC_LOAD_OR ||
Opcode == ISD::ATOMIC_LOAD_XOR ||
Opcode == ISD::ATOMIC_LOAD_NAND ||
@@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
@@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValueType(0).getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
- assert(Ops.size() == 5 && "Incompatible number of operands");
+ assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
@@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
assert(N->getIndex().getValueType().getVectorNumElements() ==
N->getValue().getValueType().getVectorNumElements() &&
"Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
// Now we update the operands.
N->OperandList[0].set(Op);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
if (N->OperandList[1] != Op2)
N->OperandList[1].set(Op2);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
if (N->OperandList[i] != Ops[i])
N->OperandList[i].set(Ops[i]);
+ updateDivergence(N);
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
return N;
@@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
/// FrameIndex
SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
DIExpression *Expr, unsigned FI,
+ bool IsIndirect,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX);
+}
+
+/// VReg
+SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ unsigned VReg, bool IsIndirect,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);
}
void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
@@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
ClonedDVs.push_back(Clone);
DV->setIsInvalidated();
- DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
- dbgs() << " into " << *DIExpr << '\n');
+ LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting";
+ N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
}
}
}
@@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
AddDbgValue(Dbg, Dbg->getSDNode(), false);
}
+/// Creates a SDDbgLabel node.
+SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O);
+}
+
namespace {
/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDUse &Use = UI.getUse();
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
SDUse &Use = UI.getUse();
++UI;
Use.setNode(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
// Now that we have modified User, add it back to the CSE maps. If it
@@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
const SDValue &ToOp = To[Use.getResNo()];
++UI;
Use.set(ToOp);
+ if (To->getNode()->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
AddModifiedNodeToCSEMaps(User);
@@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
++UI;
Use.set(To);
+ if (To->isDivergent() != From->isDivergent())
+ updateDivergence(User);
} while (UI != UE && *UI == User);
-
// We are iterating over all uses of the From node, so if a use
// doesn't use the specific value, no changes are made.
if (!UserRemovedFromCSEMaps)
@@ -7419,6 +7804,72 @@ namespace {
} // end anonymous namespace
+void SelectionDAG::updateDivergence(SDNode * N)
+{
+ if (TLI->isSDNodeAlwaysUniform(N))
+ return;
+ bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsDivergent |= Op.getNode()->isDivergent();
+ }
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ for (auto U : N->uses()) {
+ updateDivergence(U);
+ }
+ }
+}
+
+
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+ DenseMap<SDNode *, unsigned> Degree;
+ Order.reserve(AllNodes.size());
+ for (auto & N : allnodes()) {
+ unsigned NOps = N.getNumOperands();
+ Degree[&N] = NOps;
+ if (0 == NOps)
+ Order.push_back(&N);
+ }
+ for (std::vector<SDNode *>::iterator I = Order.begin();
+ I!=Order.end();++I) {
+ SDNode * N = *I;
+ for (auto U : N->uses()) {
+ unsigned &UnsortedOps = Degree[U];
+ if (0 == --UnsortedOps)
+ Order.push_back(U);
+ }
+ }
+}
+
+void SelectionDAG::VerifyDAGDiverence()
+{
+ std::vector<SDNode*> TopoOrder;
+ CreateTopologicalOrder(TopoOrder);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ DenseMap<const SDNode *, bool> DivergenceMap;
+ for (auto &N : allnodes()) {
+ DivergenceMap[&N] = false;
+ }
+ for (auto N : TopoOrder) {
+ bool IsDivergent = DivergenceMap[N];
+ bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
+ for (auto &Op : N->ops()) {
+ if (Op.Val.getValueType() != MVT::Other)
+ IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
+ }
+ if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
+ DivergenceMap[N] = true;
+ }
+ }
+ for (auto &N : allnodes()) {
+ (void)N;
+ assert(DivergenceMap[&N] == N.isDivergent() &&
+ "Divergence bit inconsistency detected\n");
+ }
+}
+
+
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
}
// Sort the uses, so that all the uses from a given User are together.
- std::sort(Uses.begin(), Uses.end());
+ llvm::sort(Uses.begin(), Uses.end());
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
@@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
+void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
+ DbgInfo->add(DB);
+}
+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
@@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
if (VT.getSizeInBits() / 8 != Bytes)
return false;
- SDValue Loc = LD->getOperand(1);
- SDValue BaseLoc = Base->getOperand(1);
-
- auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
- auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+ auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
+ auto LocDecomp = BaseIndexOffset::match(LD, *this);
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
@@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
- KnownBits Known(PtrWidth);
+ unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
+ KnownBits Known(IdxWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
@@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
-// \brief Returns the SDNode if it is a constant integer BuildVector
+// Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
@@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
return nullptr;
}
+void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
+ assert(!Node->OperandList && "Node already has operands");
+ SDUse *Ops = OperandRecycler.allocate(
+ ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
+
+ bool IsDivergent = false;
+ for (unsigned I = 0; I != Vals.size(); ++I) {
+ Ops[I].setUser(Node);
+ Ops[I].setInitial(Vals[I]);
+ if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
+ IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ }
+ Node->NumOperands = Vals.size();
+ Node->OperandList = Ops;
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ if (!TLI->isSDNodeAlwaysUniform(Node))
+ Node->SDNodeBits.IsDivergent = IsDivergent;
+ checkForCycles(Node);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index d5980919d03c..c859f16e74fe 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -21,6 +21,9 @@ using namespace llvm;
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
+ // Conservatively fail if we a match failed..
+ if (!Base.getNode() || !Other.Base.getNode())
+ return false;
// Initial Offset difference.
Off = Other.Offset - Offset;
@@ -72,24 +75,67 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
}
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
+BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
+ SDValue Ptr = N->getBasePtr();
+
// (((B + I*M) + c)) + c ...
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
SDValue Index = SDValue();
int64_t Offset = 0;
bool IsIndexSignExt = false;
+ // pre-inc/pre-dec ops are components of EA.
+ if (N->getAddressingMode() == ISD::PRE_INC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset += C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ } else if (N->getAddressingMode() == ISD::PRE_DEC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
+ Offset -= C->getSExtValue();
+ else // If unknown, give up now.
+ return BaseIndexOffset(SDValue(), SDValue(), 0, false);
+ }
+
// Consume constant adds & ors with appropriate masking.
- while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
- if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ while (true) {
+ switch (Base->getOpcode()) {
+ case ISD::OR:
// Only consider ORs which act as adds.
- if (Base->getOpcode() == ISD::OR &&
- !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue()))
- break;
- Offset += C->getSExtValue();
- Base = Base->getOperand(0);
- continue;
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1)))
+ if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) {
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
+ case ISD::ADD:
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ auto *LSBase = cast<LSBaseSDNode>(Base.getNode());
+ unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0;
+ if (LSBase->isIndexed() && Base.getResNo() == IndexResNo)
+ if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) {
+ auto Off = C->getSExtValue();
+ if (LSBase->getAddressingMode() == ISD::PRE_DEC ||
+ LSBase->getAddressingMode() == ISD::POST_DEC)
+ Offset -= Off;
+ else
+ Offset += Off;
+ Base = LSBase->getBasePtr();
+ continue;
+ }
+ break;
+ }
}
+ // If we get here break out of the loop.
break;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 71cb8cb78f6d..1aa8df29af3b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -49,7 +50,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -102,6 +102,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -777,8 +778,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
EVT ValueVT = ValueVTs[Value];
unsigned NumRegs = RegCount[Value];
MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
- : RegVTs[Value];
+ ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -818,32 +819,15 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// FIXME: We capture more information than the dag can represent. For
// now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
+ bool isSExt;
EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize) {
- isSExt = true; // ASSERT SEXT 1
- FromVT = MVT::i1;
- } else if (NumZeroBits >= RegSize - 1) {
- isSExt = false; // ASSERT ZEXT 1
- FromVT = MVT::i1;
- } else if (NumSignBits > RegSize - 8) {
- isSExt = true; // ASSERT SEXT 8
- FromVT = MVT::i8;
- } else if (NumZeroBits >= RegSize - 8) {
- isSExt = false; // ASSERT ZEXT 8
- FromVT = MVT::i8;
- } else if (NumSignBits > RegSize - 16) {
- isSExt = true; // ASSERT SEXT 16
- FromVT = MVT::i16;
- } else if (NumZeroBits >= RegSize - 16) {
- isSExt = false; // ASSERT ZEXT 16
- FromVT = MVT::i16;
- } else if (NumSignBits > RegSize - 32) {
- isSExt = true; // ASSERT SEXT 32
- FromVT = MVT::i32;
- } else if (NumZeroBits >= RegSize - 32) {
- isSExt = false; // ASSERT ZEXT 32
- FromVT = MVT::i32;
+ if (NumZeroBits) {
+ FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
+ isSExt = false;
+ } else if (NumSignBits > 1) {
+ FromVT =
+ EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
+ isSExt = true;
} else {
continue;
}
@@ -876,8 +860,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
unsigned NumParts = RegCount[Value];
MVT RegisterVT = IsABIMangled
- ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
- : RegVTs[Value];
+ ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -970,6 +954,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
+SmallVector<std::pair<unsigned, unsigned>, 4>
+RegsForValue::getRegsAndSizes() const {
+ SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
+ unsigned I = 0;
+ for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
+ unsigned RegCount = std::get<0>(CountAndVT);
+ MVT RegisterVT = std::get<1>(CountAndVT);
+ unsigned RegisterSize = RegisterVT.getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I)
+ OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
+ }
+ return OutVec;
+}
+
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
AA = aa;
@@ -1054,6 +1052,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
+ }
+ }
+
if (!isa<TerminatorInst>(&I) && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1077,14 +1091,39 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr) {
+ auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
+ const DbgValueInst *DI = DDI.getDI();
+ DIVariable *DanglingVariable = DI->getVariable();
+ DIExpression *DanglingExpr = DI->getExpression();
+ if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
+ LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
+ return true;
+ }
+ return false;
+ };
+
+ for (auto &DDIMI : DanglingDebugInfoMap) {
+ DanglingDebugInfoVector &DDIV = DDIMI.second;
+ DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
+ }
+}
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDValue Val) {
- DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
- if (DDI.getDI()) {
+ auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
+ if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
+ return;
+
+ DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
+ for (auto &DDI : DDIV) {
const DbgValueInst *DI = DDI.getDI();
+ assert(DI && "Ill-formed DanglingDebugInfo");
DebugLoc dl = DDI.getdl();
+ unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
DILocalVariable *Variable = DI->getVariable();
DIExpression *Expr = DI->getExpression();
@@ -1093,13 +1132,26 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
SDDbgValue *SDV;
if (Val.getNode()) {
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
- SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
+ LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
+ << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
+ // Increase the SDNodeOrder for the DbgValue here to make sure it is
+ // inserted after the definition of Val when emitting the instructions
+ // after ISel. An alternative could be to teach
+ // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
+ LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
+ << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
+ << ValSDNodeOrder << "\n");
+ SDV = getDbgValue(Val, Variable, Expr, dl,
+ std::max(DbgSDNodeOrder, ValSDNodeOrder));
DAG.AddDbgValue(SDV, Val.getNode(), false);
- }
+ } else
+ LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
+ << "in EmitFuncArgumentDbgValue\n");
} else
- DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
- DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
+ DDIV.clear();
}
/// getCopyFromRegs - If there was virtual register allocated for the value V
@@ -1315,12 +1367,18 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ if (!IsSEH)
+ CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
-
- DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+ // Wasm does not need catchpads anymore
+ if (!IsWasmCXX)
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
+ getControlRoot()));
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
@@ -1363,7 +1421,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
// Don't emit any special code for the cleanuppad instruction. It just marks
- // the start of a funclet.
+ // the start of an EH scope/funclet.
+ FuncInfo.MBB->setIsEHScopeEntry();
FuncInfo.MBB->setIsEHFuncletEntry();
FuncInfo.MBB->setIsCleanupFuncletEntry();
}
@@ -1385,6 +1444,7 @@ static void findUnwindDestinations(
classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
@@ -1397,6 +1457,7 @@ static void findUnwindDestinations(
// Stop on cleanup pads. Cleanups are always funclet entries for all known
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
@@ -1406,6 +1467,8 @@ static void findUnwindDestinations(
// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
}
NewEHPadBB = CatchSwitch->getUnwindDest();
} else {
@@ -1653,8 +1716,7 @@ SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
if (!BPI) {
// If BPI is not available, set the default probability as 1 / N, where N is
// the number of successors.
- auto SuccSize = std::max<uint32_t>(
- std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
return BranchProbability(1, SuccSize);
}
return BPI->getEdgeProbability(SrcBB, DstBB);
@@ -2489,8 +2551,8 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
assert(CC.Low == CC.High && "Input clusters must be single-case");
#endif
- std::sort(Clusters.begin(), Clusters.end(),
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(Clusters.begin(), Clusters.end(),
+ [](const CaseCluster &a, const CaseCluster &b) {
return a.Low->getValue().slt(b.Low->getValue());
});
@@ -2551,9 +2613,23 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
}
void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
- if (DAG.getTarget().Options.TrapUnreachable)
- DAG.setRoot(
- DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+ if (!DAG.getTarget().Options.TrapUnreachable)
+ return;
+
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *I.getParent();
+ if (&I != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(&I));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return;
+ }
+ }
+ }
+
+ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2597,6 +2673,10 @@ static bool isVectorReductionOp(const User *I) {
}
unsigned ElemNum = Inst->getType()->getVectorNumElements();
+ // Ensure the reduction size is a power of 2.
+ if (!isPowerOf2_32(ElemNum))
+ return false;
+
unsigned ElemNumToReduce = ElemNum;
// Do DFS search on the def-use chain from the given instruction. We only
@@ -2682,7 +2762,7 @@ static bool isVectorReductionOp(const User *I) {
return false;
const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
- if (!Val || Val->getZExtValue() != 0)
+ if (!Val || !Val->isZero())
return false;
ReduxExtracted = true;
@@ -2693,45 +2773,23 @@ static bool isVectorReductionOp(const User *I) {
return ReduxExtracted;
}
-void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
- SDValue Op1 = getValue(I.getOperand(0));
- SDValue Op2 = getValue(I.getOperand(1));
-
- bool nuw = false;
- bool nsw = false;
- bool exact = false;
- bool vec_redux = false;
- FastMathFlags FMF;
-
- if (const OverflowingBinaryOperator *OFBinOp =
- dyn_cast<const OverflowingBinaryOperator>(&I)) {
- nuw = OFBinOp->hasNoUnsignedWrap();
- nsw = OFBinOp->hasNoSignedWrap();
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
+ SDNodeFlags Flags;
+ if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
+ }
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
+ Flags.setExact(ExactOp->isExact());
}
- if (const PossiblyExactOperator *ExactOp =
- dyn_cast<const PossiblyExactOperator>(&I))
- exact = ExactOp->isExact();
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
- FMF = FPOp->getFastMathFlags();
-
if (isVectorReductionOp(&I)) {
- vec_redux = true;
- DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+ Flags.setVectorReduction(true);
+ LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
}
- SDNodeFlags Flags;
- Flags.setExact(exact);
- Flags.setNoSignedWrap(nsw);
- Flags.setNoUnsignedWrap(nuw);
- Flags.setVectorReduction(vec_redux);
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setAllowContract(FMF.allowContract());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.isFast());
-
- SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
setValue(&I, BinNodeValue);
}
@@ -2823,13 +2881,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
predicate = FCmpInst::Predicate(FC->getPredicate());
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- ISD::CondCode Condition = getFCmpCondCode(predicate);
- // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
- // FIXME: We should propagate the fast-math-flags to the DAG node itself for
- // further optimization, but currently FMF is only applicable to binary nodes.
- if (TM.Options.NoNaNsFPMath)
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+ auto *FPMO = dyn_cast<FPMathOperator>(&I);
+ if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
@@ -3424,10 +3481,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
- MVT PtrTy =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
- unsigned PtrSize = PtrTy.getSizeInBits();
- APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+ unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
+ MVT IdxTy = MVT::getIntegerVT(IdxSize);
+ APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3439,11 +3495,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (CI) {
if (CI->isZero())
continue;
- APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
- DAG.getConstant(Offs, dl, PtrTy);
+ DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, IdxTy);
// In an inbouds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3867,7 +3923,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
- SelectionDAGBuilder* SDB) {
+ SDValue &Scale, SelectionDAGBuilder* SDB) {
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
@@ -3897,6 +3953,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
return false;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
@@ -3926,8 +3986,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
+ SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -3935,10 +3996,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
+ SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO);
DAG.setRoot(Scatter);
@@ -3997,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD, IsExpanding);
- if (AddToChain) {
- SDValue OutChain = Load.getValue(1);
- DAG.setRoot(OutChain);
- }
+ if (AddToChain)
+ PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
}
@@ -4025,8 +4085,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
+ SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
bool ConstantMemory = false;
if (UniformBase &&
AA && AA->pointsToConstantMemory(MemoryLocation(
@@ -4044,10 +4105,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
Alignment, AAInfo, Ranges);
if (!UniformBase) {
- Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { Root, Src0, Mask, Base, Index };
+ SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO);
@@ -4868,26 +4930,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), isABIRegCopy(V));
- unsigned NumRegs =
- std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
- if (NumRegs > 1) {
- unsigned I = 0;
+ if (RFV.occupiesMultipleRegs()) {
unsigned Offset = 0;
- auto RegisterVT = RFV.RegVTs.begin();
- for (auto RegCount : RFV.RegCount) {
- unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
- for (unsigned E = I + RegCount; I != E; ++I) {
- // The vregs are guaranteed to be allocated in sequence.
- Op = MachineOperand::CreateReg(VMI->second + I, false);
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegisterSize);
- if (!FragmentExpr)
- continue;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- Op->getReg(), Variable, *FragmentExpr));
- Offset += RegisterSize;
- }
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ Op = MachineOperand::CreateReg(RegAndSize.first, false);
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
+ Op->getReg(), Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
}
return true;
}
@@ -4901,17 +4955,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- if (Op->isReg())
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- Op->getReg(), Variable, Expr));
- else
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .add(*Op)
- .addImm(0)
- .addMetadata(Variable)
- .addMetadata(Expr));
+ IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ *Op, Variable, Expr));
return true;
}
@@ -4924,13 +4971,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
- // stack slot locations as such instead of as indirectly addressed
- // locations.
- return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
- DbgSDNodeOrder);
+ // stack slot locations.
+ //
+ // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
+ // debug values here after optimization:
+ //
+ // dbg.value(i32* %px, !"int *px", !DIExpression()), and
+ // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
+ //
+ // Both describe the direct values of their associated variables.
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
- return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
- DbgSDNodeOrder);
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
+ /*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -5000,14 +5054,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
+ const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
@@ -5016,13 +5074,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset: {
+ const auto &MSI = cast<MemSetInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
+ bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
@@ -5030,14 +5088,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove: {
+ const auto &MMI = cast<MemMoveInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
- if (!Align)
- Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
- bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
+ unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
+ unsigned Align = MinAlign(DstAlign, SrcAlign);
+ bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memmove DAG
+ // node.
SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -5050,36 +5112,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Node = Src;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::memmove_element_unordered_atomic: {
@@ -5088,36 +5130,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Node = Src;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ unsigned SrcAlign = MI.getSourceAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
+ SrcAlign, Length, LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::memset_element_unordered_atomic: {
@@ -5126,37 +5148,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
- // Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
- Entry.Node = Dst;
- Args.push_back(Entry);
-
- Entry.Ty = Type::getInt8Ty(*DAG.getContext());
- Entry.Node = Val;
- Args.push_back(Entry);
-
- Entry.Ty = MI.getLength()->getType();
- Entry.Node = Length;
- Args.push_back(Entry);
-
- uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
- RTLIB::Libcall LibraryCall =
- RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
- if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
- report_fatal_error("Unsupported element size");
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
- TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
-
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- DAG.setRoot(CallResult.second);
+ unsigned DstAlign = MI.getDestAlignment();
+ Type *LengthTy = MI.getLength()->getType();
+ unsigned ElemSz = MI.getElementSizeInBytes();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
+ LengthTy, ElemSz, isTC,
+ MachinePointerInfo(MI.getRawDest()));
+ updateDAGForMaybeTailCall(MC);
return nullptr;
}
case Intrinsic::dbg_addr:
@@ -5164,13 +5163,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
// Check if address has undef value.
const Value *Address = DI.getVariableLocation();
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return nullptr;
}
@@ -5195,10 +5195,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
// the MachineFunction variable table.
if (FI != std::numeric_limits<int>::max()) {
- if (Intrinsic == Intrinsic::dbg_addr)
- DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
- SDNodeOrder),
- getRoot().getNode(), isParameter);
+ if (Intrinsic == Intrinsic::dbg_addr) {
+ SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
+ Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ }
return nullptr;
}
@@ -5214,8 +5215,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
- SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
- FINode->getIndex(), dl, SDNodeOrder);
+ SDV =
+ DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
+ /*IsIndirect*/ true, dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
@@ -5231,17 +5233,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
return nullptr;
}
+ case Intrinsic::dbg_label: {
+ const DbgLabelInst &DI = cast<DbgLabelInst>(I);
+ DILabel *Label = DI.getLabel();
+ assert(Label && "Missing label");
+
+ SDDbgLabel *SDV;
+ SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
+ DAG.AddDbgLabel(SDV);
+ return nullptr;
+ }
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
+ dropDanglingDebugInfo(Variable, Expression);
const Value *V = DI.getValue();
if (!V)
return nullptr;
@@ -5266,16 +5279,64 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ // PHI nodes have already been selected, so we should know which VReg that
+ // is assigns to already.
+ if (isa<PHINode>(V)) {
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // The PHI node may be split up into several MI PHI nodes (in
+ // FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), false);
+ if (RFV.occupiesMultipleRegs()) {
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Variable->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expression->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ unsigned RegisterSize = RegAndSize.second;
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expression, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
+ false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ Offset += RegisterSize;
+ }
+ } else {
+ SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ return nullptr;
+ }
+ }
+
+ // TODO: When we get here we will either drop the dbg.value completely, or
+ // we try to move it forward by letting it dangle for awhile. So we should
+ // probably add an extra DbgValue to the DAG here, with a reference to
+ // "noreg", to indicate that we have lost the debug location for the
+ // variable.
+
if (!V->use_empty() ) {
// Do not call getValue(V) yet, as we don't want to generate code.
// Remember it for later.
- DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
- DanglingDebugInfoMap[V] = DDI;
+ DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
return nullptr;
}
- DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
- DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return nullptr;
}
@@ -5609,6 +5670,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
return nullptr;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ bool IsFSHL = Intrinsic == Intrinsic::fshl;
+ SDValue X = getValue(I.getArgOperand(0));
+ SDValue Y = getValue(I.getArgOperand(1));
+ SDValue Z = getValue(I.getArgOperand(2));
+ EVT VT = X.getValueType();
+
+ // When X == Y, this is rotate. Create the node directly if legal.
+ // TODO: This should also be done if the operation is custom, but we have
+ // to make sure targets are handling the modulo shift amount as expected.
+ // TODO: If the rotate direction (left or right) corresponding to the shift
+ // is not available, adjust the shift value and invert the direction.
+ auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
+ if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) {
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ return nullptr;
+ }
+
+ // Get the shift amount and inverse shift amount, modulo the bit-width.
+ SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
+ SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z);
+ SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
+
+ // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW))
+ // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW))
+ SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
+
+ // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would
+ // be X | Y. If X == Y (rotate), that's fine. If not, we have to select.
+ if (X != Y) {
+ SDValue Zero = DAG.getConstant(0, sdl, VT);
+ EVT CCVT = MVT::i1;
+ if (VT.isVector())
+ CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
+ // For fshl, 0 shift returns the 1st arg (X).
+ // For fshr, 0 shift returns the 2nd arg (Y).
+ SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
+ Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res);
+ }
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
@@ -5703,7 +5810,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
- case Intrinsic::invariant_group_barrier:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
@@ -5822,17 +5930,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
- Ops[0] = getRoot();
+ Ops[0] = DAG.getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
- DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
- DAG.getVTList(MVT::Other), Ops,
- EVT::getIntegerVT(*Context, 8),
- MachinePointerInfo(I.getArgOperand(0)),
- 0, /* align */
- Flags));
+ SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
+ DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ Flags);
+
+ // Chain the prefetch in parallell with any pending loads, to stay out of
+ // the way of later optimizations.
+ PendingLoads.push_back(Result);
+ Result = getRoot();
+ DAG.setRoot(Result);
return nullptr;
}
case Intrinsic::lifetime_start:
@@ -6004,6 +6118,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, patchableNode);
return nullptr;
}
+ case Intrinsic::xray_typedevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention, and only for x86_64.
+ // FIXME: Support other platforms later.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return nullptr;
+
+ SDLoc DL = getCurSDLoc();
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ // It's unclear to me how manipulating the selection DAG here forces callers
+ // to provide arguments in registers instead of on the stack.
+ SDValue LogTypeId = getValue(I.getArgOperand(0));
+ SDValue LogEntryVal = getValue(I.getArgOperand(1));
+ SDValue StrSizeVal = getValue(I.getArgOperand(2));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogTypeId);
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return nullptr;
+ }
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return nullptr;
@@ -6023,6 +6172,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return nullptr;
+
+ case Intrinsic::icall_branch_funnel: {
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(DAG.getRoot());
+ Ops.push_back(getValue(I.getArgOperand(0)));
+
+ int64_t Offset;
+ auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(1), Offset, DAG.getDataLayout()));
+ if (!Base)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+
+ struct BranchFunnelTarget {
+ int64_t Offset;
+ SDValue Target;
+ };
+ SmallVector<BranchFunnelTarget, 8> Targets;
+
+ for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
+ I.getArgOperand(Op), Offset, DAG.getDataLayout()));
+ if (ElemBase != Base)
+ report_fatal_error("all llvm.icall.branch.funnel operands must refer "
+ "to the same GlobalValue");
+
+ SDValue Val = getValue(I.getArgOperand(Op + 1));
+ auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
+ if (!GA)
+ report_fatal_error(
+ "llvm.icall.branch.funnel operand must be a GlobalValue");
+ Targets.push_back({Offset, DAG.getTargetGlobalAddress(
+ GA->getGlobal(), getCurSDLoc(),
+ Val.getValueType(), GA->getOffset())});
+ }
+ llvm::sort(Targets.begin(), Targets.end(),
+ [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
+ return T1.Offset < T2.Offset;
+ });
+
+ for (auto &T : Targets) {
+ Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(T.Target);
+ }
+
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
+ getCurSDLoc(), MVT::Other, Ops),
+ 0);
+ DAG.setRoot(N);
+ setValue(&I, N);
+ HasTailCall = true;
+ return nullptr;
+ }
+
+ case Intrinsic::wasm_landingpad_index: {
+ // TODO store landing pad index in a map, which will be used when generating
+ // LSDA information
+ return nullptr;
+ }
}
}
@@ -6172,7 +6381,10 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- if (MF.hasEHFunclets()) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ // There is a platform (e.g. wasm) that uses funclet style IR but does not
+ // actually use outlined funclets and their LSDA info style.
+ if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
assert(CLI.CS);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
@@ -6630,14 +6842,13 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
- if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
- if (unsigned IID = II->getIntrinsicID(F)) {
- RenameFn = visitIntrinsicCall(I, IID);
- if (!RenameFn)
- return;
- }
- }
- if (Intrinsic::ID IID = F->getIntrinsicID()) {
+ // Is this an LLVM intrinsic or a target-specific intrinsic?
+ unsigned IID = F->getIntrinsicID();
+ if (!IID)
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
+ IID = II->getIntrinsicID(F);
+
+ if (IID) {
RenameFn = visitIntrinsicCall(I, IID);
if (!RenameFn)
return;
@@ -6989,27 +7200,37 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
unsigned NumRegs = 1;
if (OpInfo.ConstraintVT != MVT::Other) {
- // If this is a FP input in an integer register (or visa versa) insert a bit
- // cast of the input value. More generally, handle any case where the input
- // value disagrees with the register class we plan to stick this in.
- if (OpInfo.Type == InlineAsm::isInput && PhysReg.second &&
+ // If this is a FP operand in an integer register (or visa versa), or more
+ // generally if the operand value disagrees with the register class we plan
+ // to stick it in, fix the operand type.
+ //
+ // If this is an input value, the bitcast to the new type is done now.
+ // Bitcast for output value is done at the end of visitInlineAsm().
+ if ((OpInfo.Type == InlineAsm::isOutput ||
+ OpInfo.Type == InlineAsm::isInput) &&
+ PhysReg.second &&
!TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
- // vector types).
+ // vector types). Note: output bitcast is done at the end of
+ // visitInlineAsm().
MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
- if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
- RegVT, OpInfo.CallOperand);
+ if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+ // Exclude indirect inputs while they are unsupported because the code
+ // to perform the load is missing and thus OpInfo.CallOperand still
+ // refer to the input address rather than the pointed-to value.
+ if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
+ // If the operand is a FP value and we want it in integer registers,
+ // use the corresponding integer type. This turns an f64 value into
+ // i64, which can be passed with two i32 values on a 32-bit machine.
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
- // If the input is a FP value and we want it in FP registers, do a
- // bitcast to the corresponding integer type. This turns an f64 value
- // into i64, which can be passed with two i32 values on a 32-bit
- // machine.
RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
- RegVT, OpInfo.CallOperand);
+ if (OpInfo.Type == InlineAsm::isInput)
+ OpInfo.CallOperand =
+ DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
}
@@ -7246,7 +7467,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
continue;
// If this is a memory input, and if the operand is not indirect, do what we
- // need to to provide an address for the memory input.
+ // need to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
assert((OpInfo.isMultipleAlternative ||
@@ -7521,12 +7742,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
- // If any of the results of the inline asm is a vector, it may have the
- // wrong width/num elts. This can happen for register classes that can
- // contain multiple different value types. The preg or vreg allocated may
- // not have the same VT as was expected. Convert it to the right type
- // with bit_convert.
- if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ // If the type of the inline asm call site return value is different but
+ // has same size as the type of the asm output bitcast it. One example
+ // of this is for vectors with different width / number of elements.
+ // This can happen for register classes that can contain multiple
+ // different value types. The preg or vreg allocated may not have the
+ // same VT as was expected.
+ //
+ // This can also happen for a return value that disagrees with the
+ // register class it is put in, eg. a double in a general-purpose
+ // register on a 32-bit machine.
+ if (ResultType != Val.getValueType() &&
+ ResultType.getSizeInBits() == Val.getValueSizeInBits()) {
Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
ResultType, Val);
@@ -7581,8 +7808,17 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType());
- setValue(CS.getInstruction(), DAG.getUNDEF(VT));
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+
+ if (ValueVTs.empty())
+ return;
+
+ SmallVector<SDValue, 1> Ops;
+ for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
+
+ setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
@@ -7656,7 +7892,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return DAG.getMergeValues(Ops, SL);
}
-/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
/// the call being lowered.
///
/// This is a helper for lowering intrinsics that follow a target calling
@@ -7680,7 +7916,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgIdx);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
@@ -7691,7 +7927,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
.setIsPatchPoint(IsPatchPoint);
}
-/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// Add a stack map intrinsic call's live variable operands to a stackmap
/// or patchpoint target node's operand list.
///
/// Constants are converted to TargetConstants purely as an optimization to
@@ -7727,7 +7963,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
}
}
-/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+/// Lower llvm.experimental.stackmap directly to its target opcode.
void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
// [live variables...])
@@ -7790,7 +8026,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
FuncInfo.MF->getFrameInfo().setHasStackMap();
}
-/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+/// Lower llvm.experimental.patchpoint directly to its target opcode.
void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
@@ -7954,8 +8190,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FastMathFlags FMF;
if (isa<FPMathOperator>(I))
FMF = I.getFastMathFlags();
- SDNodeFlags SDFlags;
- SDFlags.setNoNaNs(FMF.noNaNs());
switch (Intrinsic) {
case Intrinsic::experimental_vector_reduce_fadd:
@@ -7998,10 +8232,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmax:
- Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
break;
case Intrinsic::experimental_vector_reduce_fmin:
- Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
@@ -8220,8 +8454,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
- // Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].IsReturned && !Op.getValueType().isVector()) {
+ // Conservatively only handle 'returned' on non-vectors that can be lowered,
+ // for now.
+ if (Args[i].IsReturned && !Op.getValueType().isVector() &&
+ CanLowerReturn) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -8500,7 +8736,8 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
continue;
}
- DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+ LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
+ << '\n');
// Mark this alloca and store for argument copy elision.
*Info = StaticAllocaInfo::Elidable;
@@ -8541,8 +8778,9 @@ static void tryToElideArgumentCopy(
int OldIndex = AllocaIndex;
MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
- DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
- "object size\n");
+ LLVM_DEBUG(
+ dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
return;
}
unsigned RequiredAlignment = AI->getAlignment();
@@ -8551,16 +8789,16 @@ static void tryToElideArgumentCopy(
AI->getAllocatedType());
}
if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
- DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
- "greater than stack argument alignment ("
- << RequiredAlignment << " vs "
- << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
return;
}
// Perform the elision. Delete the old stack object and replace its only use
// in the variable info map. Mark the stack object as mutable.
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
<< " Replacing frame index " << OldIndex << " with " << FixedIndex
<< '\n';
@@ -8732,14 +8970,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
"LowerFormalArguments didn't return a valid chain!");
assert(InVals.size() == Ins.size() &&
"LowerFormalArguments didn't emit the correct number of values!");
- DEBUG({
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- assert(InVals[i].getNode() &&
- "LowerFormalArguments emitted a null value!");
- assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
- "LowerFormalArguments emitted a value with the wrong type!");
- }
- });
+ LLVM_DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
// Update the DAG with the new chain value resulting from argument lowering.
DAG.setRoot(NewRoot);
@@ -8940,17 +9178,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// At this point we know that there is a 1-1 correspondence between LLVM PHI
// nodes and Machine PHI nodes, but the incoming operands have not been
// emitted yet.
- for (BasicBlock::const_iterator I = SuccBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ for (const PHINode &PN : SuccBB->phis()) {
// Ignore dead phi's.
- if (PN->use_empty()) continue;
+ if (PN.use_empty())
+ continue;
// Skip empty types
- if (PN->getType()->isEmptyTy())
+ if (PN.getType()->isEmptyTy())
continue;
unsigned Reg;
- const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
@@ -8977,7 +9215,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
@@ -9351,7 +9589,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
}
BitTestInfo BTI;
- std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+ llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
// Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
@@ -9550,15 +9788,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
// checked first. However, two clusters can have the same probability in
// which case their relative ordering is non-deterministic. So we use Low
// as a tie-breaker as clusters are guaranteed to never overlap.
- std::sort(W.FirstCluster, W.LastCluster + 1,
- [](const CaseCluster &a, const CaseCluster &b) {
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
return a.Prob != b.Prob ?
a.Prob > b.Prob :
a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
- // without without changing the order of probabilities.
+ // without changing the order of probabilities.
for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
--I;
if (I->Prob > W.LastCluster->Prob)
@@ -9883,8 +10121,8 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
if (!SwitchPeeled)
return SwitchMBB;
- DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
- << "\n");
+ LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
+ << TopCaseProb << "\n");
// Record the MBB for the peeled switch statement.
MachineFunction::iterator BBI(SwitchMBB);
@@ -9901,10 +10139,11 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
Clusters.erase(PeeledCaseIt);
for (CaseCluster &CC : Clusters) {
- DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
- << CC.Prob << "\n");
+ LLVM_DEBUG(
+ dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
- DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
}
PeeledCaseProb = TopCaseProb;
return PeeledSwitchMBB;
@@ -9983,11 +10222,13 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
findJumpTables(Clusters, &SI, DefaultMBB);
findBitTestClusters(Clusters, &SI);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Case clusters: ";
for (const CaseCluster &C : Clusters) {
- if (C.Kind == CC_JumpTable) dbgs() << "JT:";
- if (C.Kind == CC_BitTests) dbgs() << "BT:";
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
C.Low->getValue().print(dbgs(), true);
if (C.Low != C.High) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e7c2bc6821b..e421984b8af2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -33,6 +32,7 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -116,9 +116,12 @@ class SelectionDAGBuilder {
unsigned getSDNodeOrder() { return SDNodeOrder; }
};
+ /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap.
+ typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
+
/// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
/// yet seen the referent. We defer handling these until we do see it.
- DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+ DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
@@ -671,6 +674,12 @@ public:
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue getCopyFromRegs(const Value *V, Type *Ty);
+ /// If we have dangling debug info that describes \p Variable, or an
+ /// overlapping part of variable considering the \p Expr, then this method
+ /// weill drop that debug info as it isn't valid any longer.
+ void dropDanglingDebugInfo(const DILocalVariable *Variable,
+ const DIExpression *Expr);
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
@@ -678,6 +687,13 @@ public:
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
+ /// Return the SDNode for the specified IR value if it exists.
+ SDNode *getNodeForIRValue(const Value *V) {
+ if (NodeMap.find(V) == NodeMap.end())
+ return nullptr;
+ return NodeMap[V].getNode();
+ }
+
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -696,13 +712,13 @@ public:
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- Instruction::BinaryOps Opc, BranchProbability TW,
- BranchProbability FW, bool InvertCond);
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- BranchProbability TW, BranchProbability FW,
+ BranchProbability TProb, BranchProbability FProb,
bool InvertCond);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
@@ -774,11 +790,11 @@ public:
};
/// Lower \p SLI into a STATEPOINT instruction.
- SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI);
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI);
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
- void LowerStatepoint(ImmutableStatepoint Statepoint,
+ void LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB = nullptr);
void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
@@ -838,7 +854,7 @@ private:
void visitInvoke(const InvokeInst &I);
void visitResume(const ResumeInst &I);
- void visitBinary(const User &I, unsigned OpCode);
+ void visitBinary(const User &I, unsigned Opcode);
void visitShift(const User &I, unsigned Opcode);
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
@@ -881,7 +897,7 @@ private:
void visitExtractValue(const User &I);
void visitInsertValue(const User &I);
- void visitLandingPad(const LandingPadInst &I);
+ void visitLandingPad(const LandingPadInst &LP);
void visitGetElementPtr(const User &I);
void visitSelect(const User &I);
@@ -926,7 +942,7 @@ private:
const BasicBlock *EHPadBB = nullptr);
// These two are implemented in StatepointLowering.cpp
- void visitGCRelocate(const GCRelocateInst &I);
+ void visitGCRelocate(const GCRelocateInst &Relocate);
void visitGCResult(const GCResultInst &I);
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
@@ -1036,9 +1052,17 @@ struct RegsForValue {
/// Add this value to the specified inlineasm node operand list. This adds the
/// code marker, matching input operand index (if applicable), and includes
/// the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
+ void AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+
+ /// Check if the total RegCount is greater than one.
+ bool occupiesMultipleRegs() const {
+ return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1;
+ }
+
+ /// Return a list of registers and their sizes.
+ SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index dd30dc16378c..fa341e8b5fa5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -28,18 +27,21 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -85,6 +87,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr";
case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
@@ -176,20 +179,30 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMAXNAN: return "fmaxnan";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
+ case ISD::STRICT_FSQRT: return "strict_fsqrt";
case ISD::FSIN: return "fsin";
+ case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
+ case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
+ case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::FEXP: return "fexp";
+ case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
+ case ISD::STRICT_FEXP2: return "strict_fexp2";
case ISD::FLOG: return "flog";
+ case ISD::STRICT_FLOG: return "strict_flog";
case ISD::FLOG2: return "flog2";
+ case ISD::STRICT_FLOG2: return "strict_flog2";
case ISD::FLOG10: return "flog10";
+ case ISD::STRICT_FLOG10: return "strict_flog10";
// Binary operators
case ISD::ADD: return "add";
@@ -214,24 +227,31 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTL: return "rotl";
case ISD::ROTR: return "rotr";
case ISD::FADD: return "fadd";
+ case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
+ case ISD::STRICT_FSUB: return "strict_fsub";
case ISD::FMUL: return "fmul";
+ case ISD::STRICT_FMUL: return "strict_fmul";
case ISD::FDIV: return "fdiv";
+ case ISD::STRICT_FDIV: return "strict_fdiv";
case ISD::FMA: return "fma";
+ case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem";
+ case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
case ISD::FCANONICALIZE: return "fcanonicalize";
case ISD::FPOW: return "fpow";
+ case ISD::STRICT_FPOW: return "strict_fpow";
case ISD::SMIN: return "smin";
case ISD::SMAX: return "smax";
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
case ISD::FPOWI: return "fpowi";
+ case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
- case ISD::SETCCE: return "setcce";
case ISD::SETCCCARRY: return "setcccarry";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
@@ -366,7 +386,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE2: return "setfalse2";
}
case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd";
case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul";
case ISD::VECREDUCE_ADD: return "vecreduce_add";
case ISD::VECREDUCE_MUL: return "vecreduce_mul";
case ISD::VECREDUCE_AND: return "vecreduce_and";
@@ -401,6 +423,32 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
+// Print the MMO with more information from the SelectionDAG.
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const MachineFunction *MF, const Module *M,
+ const MachineFrameInfo *MFI,
+ const TargetInstrInfo *TII, LLVMContext &Ctx) {
+ ModuleSlotTracker MST(M);
+ if (MF)
+ MST.incorporateFunction(MF->getFunction());
+ SmallVector<StringRef, 0> SSNs;
+ MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
+}
+
+static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
+ const SelectionDAG *G) {
+ if (G) {
+ const MachineFunction *MF = &G->getMachineFunction();
+ return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
+ &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
+ *G->getContext());
+ } else {
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
@@ -430,9 +478,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasExact())
OS << " exact";
- if (getFlags().hasUnsafeAlgebra())
- OS << " unsafe";
-
if (getFlags().hasNoNaNs())
OS << " nnan";
@@ -448,6 +493,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasAllowContract())
OS << " contract";
+ if (getFlags().hasApproximateFuncs())
+ OS << " afn";
+
+ if (getFlags().hasAllowReassociation())
+ OS << " reassoc";
+
if (getFlags().hasVectorReduction())
OS << " vector-reduction";
@@ -457,7 +508,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << "Mem:";
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
- OS << **i;
+ printMemOperand(OS, **i, G);
if (std::next(i) != e)
OS << " ";
}
@@ -549,7 +600,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ":" << N->getVT().getEVTString();
}
else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
- OS << "<" << *LD->getMemOperand();
+ OS << "<";
+
+ printMemOperand(OS, *LD->getMemOperand(), G);
bool doExt = true;
switch (LD->getExtensionType()) {
@@ -567,7 +620,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ">";
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
- OS << "<" << *ST->getMemOperand();
+ OS << "<";
+ printMemOperand(OS, *ST->getMemOperand(), G);
if (ST->isTruncatingStore())
OS << ", trunc to " << ST->getMemoryVT().getEVTString();
@@ -578,7 +632,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
- OS << "<" << *M->getMemOperand() << ">";
+ OS << "<";
+ printMemOperand(OS, *M->getMemOperand(), G);
+ OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
int64_t offset = BA->getOffset();
@@ -608,6 +664,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
+ if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
+ OS << "# D:" << isDivergent();
if (!G)
return;
@@ -779,4 +837,8 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
if (i) OS << ", "; else OS << " ";
printOperand(OS, G, getOperand(i));
}
+ if (DebugLoc DL = getDebugLoc()) {
+ OS << ", ";
+ DL.print(OS);
+ }
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index d13ccc263718..f7bd8847bee3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -29,6 +29,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -43,7 +44,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -82,6 +82,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -196,7 +197,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
namespace llvm {
//===--------------------------------------------------------------------===//
- /// \brief This class is used by SelectionDAGISel to temporarily override
+ /// This class is used by SelectionDAGISel to temporarily override
/// the optimization level on a per-function basis.
class OptLevelChanger {
SelectionDAGISel &IS;
@@ -211,26 +212,27 @@ namespace llvm {
return;
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
- DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
- << " ; After: -O" << NewOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
+ << NewOptLevel << "\n");
SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == CodeGenOpt::None) {
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
- DEBUG(dbgs() << "\tFastISel is "
- << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
- << "\n");
+ LLVM_DEBUG(
+ dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
}
}
~OptLevelChanger() {
if (IS.OptLevel == SavedOptLevel)
return;
- DEBUG(dbgs() << "\nRestoring optimization level for Function "
- << IS.MF->getFunction().getName() << "\n");
- DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
- << " ; After: -O" << SavedOptLevel << "\n");
+ LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction().getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O"
+ << SavedOptLevel << "\n");
IS.OptLevel = SavedOptLevel;
IS.TM.setOptLevel(SavedOptLevel);
IS.TM.setFastISel(SavedFastISel);
@@ -326,9 +328,9 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
- AU.addPreserved<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -410,11 +412,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF, *ORE, this);
+ CurDAG->init(*MF, *ORE, this, LibInfo,
+ getAnalysisIfAvailable<DivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -513,8 +516,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// FIXME: VR def may not be in entry block.
Def->getParent()->insert(std::next(InsertPos), MI);
} else
- DEBUG(dbgs() << "Dropping debug info for dead vreg"
- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -621,8 +624,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// at this point.
FuncInfo->clear();
- DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
- DEBUG(MF->print(dbgs()));
+ LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ LLVM_DEBUG(MF->print(dbgs()));
return true;
}
@@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+ TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);
// Pre-type legalization allow creation of any node types.
CurDAG->NewNodesMustHaveLegalTypes = false;
@@ -718,7 +723,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
FilterDAGBasicBlockName ==
- FuncInfo->MBB->getBasicBlock()->getName().str());
+ FuncInfo->MBB->getBasicBlock()->getName());
#endif
#ifdef NDEBUG
if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
@@ -730,9 +735,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB)
- << " '" << BlockName << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Initial selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -744,10 +750,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -761,10 +770,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -780,10 +792,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
{
@@ -793,10 +808,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- DEBUG(dbgs() << "Vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
@@ -804,10 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -819,10 +834,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
+
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -834,10 +852,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- DEBUG(dbgs() << "Legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -849,10 +870,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+
+ LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -868,10 +892,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG: "
- << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
- << "'\n";
- CurDAG->dump());
+ LLVM_DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -937,10 +961,62 @@ public:
} // end anonymous namespace
+// This function is used to enforce the topological node id property
+// property leveraged during Instruction selection. Before selection all
+// nodes are given a non-negative id such that all nodes have a larger id than
+// their operands. As this holds transitively we can prune checks that a node N
+// is a predecessor of M another by not recursively checking through M's
+// operands if N's ID is larger than M's ID. This is significantly improves
+// performance of for various legality checks (e.g. IsLegalToFold /
+// UpdateChains).
+
+// However, when we fuse multiple nodes into a single node
+// during selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged violating the topological property.
+// Should a fused node have a successor which has yet to be selected, our
+// legality checks would be incorrect. To avoid this we mark all unselected
+// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
+// We use bit-negation to more clearly enforce that node id -1 can only be
+// achieved by selected nodes). As the conversion is reversable the original Id,
+// topological pruning can still be leveraged when looking for unselected nodes.
+// This method is call internally in all ISel replacement calls.
+void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ SDNode *N = Nodes.pop_back_val();
+ for (auto *U : N->uses()) {
+ auto UId = U->getNodeId();
+ if (UId > 0) {
+ InvalidateNodeId(U);
+ Nodes.push_back(U);
+ }
+ }
+ }
+}
+
+// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// NodeId with the equivalent node id which is invalid for topological
+// pruning.
+void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
+ int InvalidId = -(N->getNodeId() + 1);
+ N->setNodeId(InvalidId);
+}
+
+// getUninvalidatedNodeId - get original uninvalidated node id.
+int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) {
+ int Id = N->getNodeId();
+ if (Id < -1)
+ return -(Id + 1);
+ return Id;
+}
+
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(dbgs() << "===== Instruction selection begins: "
- << printMBBReference(*FuncInfo->MBB) << " '"
- << FuncInfo->MBB->getName() << "'\n");
+ LLVM_DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
@@ -972,6 +1048,33 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
+#ifndef NDEBUG
+ SmallVector<SDNode *, 4> Nodes;
+ Nodes.push_back(Node);
+
+ while (!Nodes.empty()) {
+ auto N = Nodes.pop_back_val();
+ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0)
+ continue;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op->getOpcode() == ISD::TokenFactor)
+ Nodes.push_back(Op.getNode());
+ else {
+ // We rely on topological ordering of node ids for checking for
+ // cycles when fusing nodes during selection. All unselected nodes
+ // successors of an already selected node should have a negative id.
+ // This assertion will catch such cases. If this assertion triggers
+ // it is likely you using DAG-level Value/Node replacement functions
+ // (versus equivalent ISEL replacement) in backend-specific
+ // selections. See comment in EnforceNodeIdInvariant for more
+ // details.
+ assert(Op->getNodeId() != -1 &&
+ "Node has already selected predecessor node");
+ }
+ }
+ }
+#endif
+
// When we are using non-default rounding modes or FP exception behavior
// FP operations are represented by StrictFP pseudo-operations. They
// need to be simplified here so that the target-specific instruction
@@ -985,13 +1088,16 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->isStrictFPOpcode())
Node = CurDAG->mutateStrictFPToFP(Node);
+ LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
+ Node->dump(CurDAG));
+
Select(Node);
}
CurDAG->setRoot(Dummy.getValue());
}
- DEBUG(dbgs() << "===== Instruction selection ends:\n");
+ LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");
PostprocessISelDAG();
}
@@ -1264,7 +1370,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
auto DLoc = isa<Instruction>(SwiftErrorVal)
- ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
: DebugLoc();
const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
@@ -1380,8 +1486,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
- if (TM.Options.EnableFastISel)
+ if (TM.Options.EnableFastISel) {
+ LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+ }
setupSwiftErrorVals(Fn, TLI, FuncInfo);
@@ -1396,6 +1504,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
FuncInfo->InsertPt = FuncInfo->MBB->begin();
+ CurDAG->setFunctionLoweringInfo(FuncInfo);
+
if (!FastIS) {
LowerArguments(Fn);
} else {
@@ -1433,6 +1543,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
processDbgDeclares(FuncInfo);
// Iterate over all basic blocks in the function.
+ StackProtector &SP = getAnalysis<StackProtector>();
for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
@@ -1445,13 +1556,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (AllPredsVisited) {
- for (BasicBlock::const_iterator I = LLVMBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I)
- FuncInfo->ComputePHILiveOutRegInfo(PN);
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->ComputePHILiveOutRegInfo(&PN);
} else {
- for (BasicBlock::const_iterator I = LLVMBB->begin();
- const PHINode *PN = dyn_cast<PHINode>(I); ++I)
- FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+ for (const PHINode &PN : LLVMBB->phis())
+ FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
}
FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -1604,7 +1713,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
- if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
+ if (SP.shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
@@ -1630,11 +1739,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
+ if (FastIS)
+ FastIS->finishBasicBlock();
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
ElidedArgCopyInstrs.clear();
}
+ SP.copyToMachineFrameInfo(MF->getFrameInfo());
+
propagateSwiftErrorVRegs(FuncInfo);
delete FastIS;
@@ -1728,12 +1841,12 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
void
SelectionDAGISel::FinishBasicBlock() {
- DEBUG(dbgs() << "Total amount of phi nodes to update: "
- << FuncInfo->PHINodesToUpdate.size() << "\n";
- for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
- dbgs() << "Node " << i << " : ("
- << FuncInfo->PHINodesToUpdate[i].first
- << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+ LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e;
+ ++i) dbgs()
+ << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
// Next, now that we know what the last MBB the LLVM BB expanded is, update
// PHI nodes in successors.
@@ -2012,7 +2125,7 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
return true;
// If the actual AND mask is allowing unallowed bits, this doesn't match.
- if (ActualMask.intersects(~DesiredMask))
+ if (!ActualMask.isSubsetOf(DesiredMask))
return false;
// Otherwise, the DAG Combiner may have proven that the value coming in is
@@ -2041,7 +2154,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
return true;
// If the actual AND mask is allowing unallowed bits, this doesn't match.
- if (ActualMask.intersects(~DesiredMask))
+ if (!ActualMask.isSubsetOf(DesiredMask))
return false;
// Otherwise, the DAG Combiner may have proven that the value coming in is
@@ -2134,52 +2247,44 @@ static SDNode *findGlueUse(SDNode *N) {
return nullptr;
}
-/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
-/// This function iteratively traverses up the operand chain, ignoring
-/// certain nodes.
-static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
+/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
+/// beyond "ImmedUse". We may ignore chains as they are checked separately.
+static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
bool IgnoreChains) {
- // The NodeID's are given uniques ID's where a node ID is guaranteed to be
- // greater than all of its (recursive) operands. If we scan to a point where
- // 'use' is smaller than the node we're scanning for, then we know we will
- // never find it.
- //
- // The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of glue
- // uses.
- std::vector<SDNode *> WorkList;
- WorkList.push_back(Use);
-
- while (!WorkList.empty()) {
- Use = WorkList.back();
- WorkList.pop_back();
- if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
- continue;
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 16> WorkList;
+ // Only check if we have non-immediate uses of Def.
+ if (ImmedUse->isOnlyUserOf(Def))
+ return false;
- // Don't revisit nodes if we already scanned it and didn't fail, we know we
- // won't fail if we scan it again.
- if (!Visited.insert(Use).second)
+ // We don't care about paths to Def that go through ImmedUse so mark it
+ // visited and mark non-def operands as used.
+ Visited.insert(ImmedUse);
+ for (const SDValue &Op : ImmedUse->op_values()) {
+ SDNode *N = Op.getNode();
+ // Ignore chain deps (they are validated by
+ // HandleMergeInputChains) and immediate uses
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
continue;
+ if (!Visited.insert(N).second)
+ continue;
+ WorkList.push_back(N);
+ }
- for (const SDValue &Op : Use->op_values()) {
- // Ignore chain uses, they are validated by HandleMergeInputChains.
- if (Op.getValueType() == MVT::Other && IgnoreChains)
- continue;
-
+ // Initialize worklist to operands of Root.
+ if (Root != ImmedUse) {
+ for (const SDValue &Op : Root->op_values()) {
SDNode *N = Op.getNode();
- if (N == Def) {
- if (Use == ImmedUse || Use == Root)
- continue; // We are not looking for immediate use.
- assert(N != Root);
- return true;
- }
-
- // Traverse up the operand chain.
+ // Ignore chains (they are validated by HandleMergeInputChains)
+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
+ continue;
+ if (!Visited.insert(N).second)
+ continue;
WorkList.push_back(N);
}
}
- return false;
+
+ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
}
/// IsProfitableToFold - Returns true if it's profitable to fold the specific
@@ -2199,7 +2304,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// If Root use can somehow reach N through a path that that doesn't contain
// U then folding N would create a cycle. e.g. In the following
- // diagram, Root can reach N through X. If N is folded into into Root, then
+ // diagram, Root can reach N through X. If N is folded into Root, then
// X is both a predecessor and a successor of U.
//
// [N*] //
@@ -2251,13 +2356,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
- // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // the chain, HandleMergeInputChains will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
- SmallPtrSet<SDNode*, 16> Visited;
- return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -2360,7 +2464,8 @@ void SelectionDAGISel::UpdateChains(
std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
static_cast<SDNode *>(nullptr));
});
- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
+ if (ChainNode->getOpcode() != ISD::TokenFactor)
+ ReplaceUses(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
@@ -2372,144 +2477,7 @@ void SelectionDAGISel::UpdateChains(
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes);
- DEBUG(dbgs() << "ISEL: Match complete!\n");
-}
-
-enum ChainResult {
- CR_Simple,
- CR_InducesCycle,
- CR_LeadsToInteriorNode
-};
-
-/// WalkChainUsers - Walk down the users of the specified chained node that is
-/// part of the pattern we're matching, looking at all of the users we find.
-/// This determines whether something is an interior node, whether we have a
-/// non-pattern node in between two pattern nodes (which prevent folding because
-/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
-/// between pattern nodes (in which case the TF becomes part of the pattern).
-///
-/// The walk we do here is guaranteed to be small because we quickly get down to
-/// already selected nodes "below" us.
-static ChainResult
-WalkChainUsers(const SDNode *ChainedNode,
- SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
- DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
- SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
- ChainResult Result = CR_Simple;
-
- for (SDNode::use_iterator UI = ChainedNode->use_begin(),
- E = ChainedNode->use_end(); UI != E; ++UI) {
- // Make sure the use is of the chain, not some other value we produce.
- if (UI.getUse().getValueType() != MVT::Other) continue;
-
- SDNode *User = *UI;
-
- if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
- continue;
-
- // If we see an already-selected machine node, then we've gone beyond the
- // pattern that we're selecting down into the already selected chunk of the
- // DAG.
- unsigned UserOpcode = User->getOpcode();
- if (User->isMachineOpcode() ||
- UserOpcode == ISD::CopyToReg ||
- UserOpcode == ISD::CopyFromReg ||
- UserOpcode == ISD::INLINEASM ||
- UserOpcode == ISD::EH_LABEL ||
- UserOpcode == ISD::LIFETIME_START ||
- UserOpcode == ISD::LIFETIME_END) {
- // If their node ID got reset to -1 then they've already been selected.
- // Treat them like a MachineOpcode.
- if (User->getNodeId() == -1)
- continue;
- }
-
- // If we have a TokenFactor, we handle it specially.
- if (User->getOpcode() != ISD::TokenFactor) {
- // If the node isn't a token factor and isn't part of our pattern, then it
- // must be a random chained node in between two nodes we're selecting.
- // This happens when we have something like:
- // x = load ptr
- // call
- // y = x+4
- // store y -> ptr
- // Because we structurally match the load/store as a read/modify/write,
- // but the call is chained between them. We cannot fold in this case
- // because it would induce a cycle in the graph.
- if (!std::count(ChainedNodesInPattern.begin(),
- ChainedNodesInPattern.end(), User))
- return CR_InducesCycle;
-
- // Otherwise we found a node that is part of our pattern. For example in:
- // x = load ptr
- // y = x+4
- // store y -> ptr
- // This would happen when we're scanning down from the load and see the
- // store as a user. Record that there is a use of ChainedNode that is
- // part of the pattern and keep scanning uses.
- Result = CR_LeadsToInteriorNode;
- InteriorChainedNodes.push_back(User);
- continue;
- }
-
- // If we found a TokenFactor, there are two cases to consider: first if the
- // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
- // uses of the TF are in our pattern) we just want to ignore it. Second,
- // the TokenFactor can be sandwiched in between two chained nodes, like so:
- // [Load chain]
- // ^
- // |
- // [Load]
- // ^ ^
- // | \ DAG's like cheese
- // / \ do you?
- // / |
- // [TokenFactor] [Op]
- // ^ ^
- // | |
- // \ /
- // \ /
- // [Store]
- //
- // In this case, the TokenFactor becomes part of our match and we rewrite it
- // as a new TokenFactor.
- //
- // To distinguish these two cases, do a recursive walk down the uses.
- auto MemoizeResult = TokenFactorResult.find(User);
- bool Visited = MemoizeResult != TokenFactorResult.end();
- // Recursively walk chain users only if the result is not memoized.
- if (!Visited) {
- auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
- InteriorChainedNodes);
- MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
- }
- switch (MemoizeResult->second) {
- case CR_Simple:
- // If the uses of the TokenFactor are just already-selected nodes, ignore
- // it, it is "below" our pattern.
- continue;
- case CR_InducesCycle:
- // If the uses of the TokenFactor lead to nodes that are not part of our
- // pattern that are not selected, folding would turn this into a cycle,
- // bail out now.
- return CR_InducesCycle;
- case CR_LeadsToInteriorNode:
- break; // Otherwise, keep processing.
- }
-
- // Okay, we know we're in the interesting interior case. The TokenFactor
- // is now going to be considered part of the pattern so that we rewrite its
- // uses (it may have uses that are not part of the pattern) with the
- // ultimate chain result of the generated code. We will also add its chain
- // inputs as inputs to the ultimate TokenFactor we create.
- Result = CR_LeadsToInteriorNode;
- if (!Visited) {
- ChainedNodesInPattern.push_back(User);
- InteriorChainedNodes.push_back(User);
- }
- }
-
- return Result;
+ LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");
}
/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
@@ -2521,47 +2489,56 @@ WalkChainUsers(const SDNode *ChainedNode,
static SDValue
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
SelectionDAG *CurDAG) {
- // Used for memoization. Without it WalkChainUsers could take exponential
- // time to run.
- DenseMap<const SDNode *, ChainResult> TokenFactorResult;
- // Walk all of the chained nodes we've matched, recursively scanning down the
- // users of the chain result. This adds any TokenFactor nodes that are caught
- // in between chained nodes to the chained and interior nodes list.
- SmallVector<SDNode*, 3> InteriorChainedNodes;
- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
- if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
- TokenFactorResult,
- InteriorChainedNodes) == CR_InducesCycle)
- return SDValue(); // Would induce a cycle.
- }
- // Okay, we have walked all the matched nodes and collected TokenFactor nodes
- // that we are interested in. Form our input TokenFactor node.
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
SmallVector<SDValue, 3> InputChains;
- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
- // Add the input chain of this node to the InputChains list (which will be
- // the operands of the generated TokenFactor) if it's not an interior node.
- SDNode *N = ChainNodesMatched[i];
- if (N->getOpcode() != ISD::TokenFactor) {
- if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
- continue;
+ unsigned int Max = 8192;
- // Otherwise, add the input chain.
- SDValue InChain = ChainNodesMatched[i]->getOperand(0);
- assert(InChain.getValueType() == MVT::Other && "Not a chain");
- InputChains.push_back(InChain);
- continue;
- }
+ // Quick exit on trivial merge.
+ if (ChainNodesMatched.size() == 1)
+ return ChainNodesMatched[0]->getOperand(0);
- // If we have a token factor, we want to add all inputs of the token factor
- // that are not part of the pattern we're matching.
- for (const SDValue &Op : N->op_values()) {
- if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
- Op.getNode()))
- InputChains.push_back(Op);
- }
+ // Add chains that aren't already added (internal). Peek through
+ // token factors.
+ std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
+ if (V.getValueType() != MVT::Other)
+ return;
+ if (V->getOpcode() == ISD::EntryToken)
+ return;
+ if (!Visited.insert(V.getNode()).second)
+ return;
+ if (V->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : V->op_values())
+ AddChains(Op);
+ } else
+ InputChains.push_back(V);
+ };
+
+ for (auto *N : ChainNodesMatched) {
+ Worklist.push_back(N);
+ Visited.insert(N);
}
+ while (!Worklist.empty())
+ AddChains(Worklist.pop_back_val()->getOperand(0));
+
+ // Skip the search if there are no chain dependencies.
+ if (InputChains.size() == 0)
+ return CurDAG->getEntryNode();
+
+ // If one of these chains is a successor of input, we must have a
+ // node that is both the predecessor and successor of the
+ // to-be-merged nodes. Fail.
+ Visited.clear();
+ for (SDValue V : InputChains)
+ Worklist.push_back(V.getNode());
+
+ for (auto *N : ChainNodesMatched)
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
+ return SDValue();
+
+ // Return merged chain.
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
@@ -2606,8 +2583,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the glue if needed.
if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
(unsigned)OldGlueResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
- SDValue(Res, ResNumResults-1));
+ ReplaceUses(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults - 1));
if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
@@ -2615,14 +2592,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
- SDValue(Res, ResNumResults-1));
+ ReplaceUses(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults - 1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node) {
- CurDAG->ReplaceAllUsesWith(Node, Res);
- CurDAG->RemoveDeadNode(Node);
+ ReplaceNode(Node, Res);
+ } else {
+ EnforceNodeIdInvariant(Res);
}
return Res;
@@ -2861,7 +2839,7 @@ struct MatchScope {
bool HasChainNodesMatched;
};
-/// \\brief A DAG update listener to keep the matching state
+/// \A DAG update listener to keep the matching state
/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
/// change the DAG while matching. X86 addressing mode matcher is an example
/// for this.
@@ -2939,8 +2917,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
return;
case ISD::AssertSext:
case ISD::AssertZext:
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
- NodeToMatch->getOperand(0));
+ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
@@ -2988,9 +2965,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
- NodeToMatch->dump(CurDAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -3002,7 +2977,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
- DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
} else if (MatcherTable[0] == OPC_SwitchOpcode) {
// Otherwise, the table isn't computed, but the state machine does start
@@ -3069,9 +3044,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (!Result)
break;
- DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at "
- << "index " << MatcherIndexOfPredicate
- << ", continuing at " << FailIndex << "\n");
+ LLVM_DEBUG(
+ dbgs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate << ", continuing at "
+ << FailIndex << "\n");
++NumDAGIselRetries;
// Otherwise, we know that this case of the Scope is guaranteed to fail,
@@ -3120,11 +3096,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (auto *MN = dyn_cast<MemSDNode>(N))
MatchedMemRefs.push_back(MN->getMemOperand());
else {
- DEBUG(
- dbgs() << "Expected MemSDNode ";
- N->dump(CurDAG);
- dbgs() << '\n'
- );
+ LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG);
+ dbgs() << '\n');
}
continue;
@@ -3245,8 +3218,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
- << " to " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to "
+ << MatcherIndex << "\n");
continue;
}
@@ -3277,8 +3250,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
- << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex
+ << '\n');
continue;
}
case OPC_CheckChild0Type: case OPC_CheckChild1Type:
@@ -3658,16 +3632,11 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
- DEBUG(
- if (!MatchedMemRefs.empty() && Res->memoperands_empty())
- dbgs() << " Dropping mem operands\n";
- dbgs() << " "
- << (IsMorphNodeTo ? "Morphed" : "Created")
- << " node: ";
- Res->dump(CurDAG);
-
- dbgs() << '\n';
- );
+ LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
+ << " Dropping mem operands\n";
+ dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: ";
+ Res->dump(CurDAG););
// If this was a MorphNodeTo then we're completely done!
if (IsMorphNodeTo) {
@@ -3702,7 +3671,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
NodeToMatch->getValueType(i).getSizeInBits() ==
Res.getValueSizeInBits()) &&
"invalid replacement");
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ ReplaceUses(SDValue(NodeToMatch, i), Res);
}
// Update chain uses.
@@ -3715,8 +3684,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
MVT::Glue &&
InputGlue.getNode())
- CurDAG->ReplaceAllUsesOfValueWith(
- SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue);
+ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1),
+ InputGlue);
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
@@ -3729,7 +3698,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
- DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex
+ << "\n");
++NumDAGIselRetries;
while (true) {
if (MatchScopes.empty()) {
@@ -3749,7 +3719,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
- DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
+ LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
InputChain = LastScope.InputChain;
InputGlue = LastScope.InputGlue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index be4ab094bf49..3b19bff4743d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -229,7 +229,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
if (level >= 20) {
if (!printed) {
printed = true;
- DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n");
}
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 3f64b49e3555..5cf06e62b80c 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -43,6 +42,7 @@
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d76e52d78870..fa867fcec366 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -32,6 +31,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
return true;
}
-/// \brief Set CallLoweringInfo attribute flags based on a call instruction
+/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
unsigned ArgIdx) {
@@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
// Other users may use these bits.
+ EVT VT = Op.getValueType();
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the Known bits to
@@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
} else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (!Op.isUndef())
- return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
return false;
} else if (Depth == 6) { // Limit search depth.
return false;
@@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownBits LHSKnown;
// Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
- // If the LHS already has zeros where RHSC does, this and is dead.
+ // If the LHS already has zeros where RHSC does, this 'and' is dead.
if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op0);
@@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
LHSKnown.One == ~RHSC->getAPIntValue()) {
- SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
- Op0.getOperand(0), Op.getOperand(1));
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
+ Op.getOperand(1));
return TLO.CombineTo(Op, Xor);
}
}
@@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, Op.getOperand(1));
// If all of the demanded bits in the inputs are known zeros, return zero.
if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
return true;
@@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
@@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// NB: it is okay if more bits are known than are requested
if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
if (Known.One == Known2.One) { // set bits are the same on both sides
- EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
@@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (C && !C->isAllOnesValue()) {
if (NewMask.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType());
+ SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
return TLO.CombineTo(Op, New);
}
// If we can't turn this into a 'not', try to shrink the constant.
@@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
- getBooleanContents(Op.getValueType()) ==
+ getBooleanContents(VT) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
@@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0),
NewSA));
@@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getConstant(ShAmt, dl, ShTy));
return
TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
- NarrowShl));
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
@@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
@@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue NewSA =
TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0),
NewSA));
@@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the shift amount is >= the size of the datatype, which is undefined.
if (NewMask.isOneValue())
return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1)));
+ TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
+ Op.getOperand(1)));
if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
- EVT VT = Op.getValueType();
-
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
@@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!AlreadySignExtended) {
// Compute the correct shift amount type, which must be getShiftAmountTy
// for scalar types after legalization.
- EVT ShiftAmtTy = Op.getValueType();
+ EVT ShiftAmtTy = VT;
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
ShiftAmtTy);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(), InOp,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
ShiftAmt));
}
}
@@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If none of the top bits are demanded, convert this into an any_extend.
if (NewMask.getActiveBits() <= OperandBitWidth)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
Op.getOperand(0)));
APInt InMask = NewMask.trunc(OperandBitWidth);
@@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If none of the top bits are demanded, convert this into an any_extend.
if (NewMask.getActiveBits() <= InBits)
- return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
Op.getOperand(0)));
// Since some of the sign extended bits are demanded, we know that the sign
@@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the sign bit is known zero, convert this to a zero extend.
if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
Op.getOperand(0)));
break;
}
@@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
- if (TLO.LegalTypes() &&
- !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
@@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SDValue Shift = In.getOperand(1);
if (TLO.LegalTypes()) {
uint64_t ShVal = ShAmt->getZExtValue();
- Shift = TLO.DAG.getConstant(ShVal, dl,
- getShiftAmountTy(Op.getValueType(), DL));
+ Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
}
if (ShAmt->getZExtValue() < BitWidth) {
@@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
- Op.getValueType(),
- NewTrunc,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
Shift));
}
}
@@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::AssertZext: {
// AssertZext demands all of the high bits, plus any of the low bits
// demanded by its users.
- EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- APInt InMask = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
+ EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
Known, TLO, Depth+1))
return true;
@@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!TLO.LegalOperations() &&
- !Op.getValueType().isVector() &&
+ if (!TLO.LegalOperations() && !VT.isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
- bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
- if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ if ((OpVTLegal || i32Legal) && VT.isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f16 &&
Op.getOperand(0).getValueType() != MVT::f128) {
// Cannot eliminate/lower SHL for f128 yet.
- EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ EVT Ty = OpVTLegal ? VT : MVT::i32;
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
- Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
unsigned ShVal = Op.getValueSizeInBits() - 1;
- SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
- Op.getValueType(),
- Sign, ShAmt));
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
+ // If this is a bitcast, let computeKnownBits handle it. Only do this on a
+ // recursive call where Known may be useful to the caller.
+ if (Depth > 0) {
+ TLO.DAG.computeKnownBits(Op, Known, Depth);
+ return false;
+ }
break;
case ISD::ADD:
case ISD::MUL:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
- APInt LoMask = APInt::getLowBitsSet(BitWidth,
- BitWidth - NewMask.countLeadingZeros());
- if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) ||
- SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) ||
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ unsigned NewMaskLZ = NewMask.countLeadingZeros();
+ APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
+ if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1),
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
}
+
+ // If we have a constant operand, we may be able to turn it into -1 if we
+ // do not demand the high bits. This can make the constant smaller to
+ // encode, allow more general folding, or match specialized instruction
+ // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
+ // is probably not useful (and could be detrimental).
+ ConstantSDNode *C = isConstOrConstSplat(Op1);
+ APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
+ if (C && !C->isAllOnesValue() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
+ // We can't guarantee that the new math op doesn't wrap, so explicitly
+ // clear those flags to prevent folding with a potential existing node
+ // that has those flags set.
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+
LLVM_FALLTHROUGH;
}
default:
@@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (C->isOpaque())
return false;
}
- return TLO.CombineTo(Op,
- TLO.DAG.getConstant(Known.One, dl, Op.getValueType()));
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ }
+
+ return false;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
+ APInt &KnownUndef,
+ APInt &KnownZero,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+
+ bool Simplified =
+ SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
+ if (Simplified)
+ DCI.CommitTargetLoweringOpt(TLO);
+ return Simplified;
+}
+
+bool TargetLowering::SimplifyDemandedVectorElts(
+ SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+ APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
+ bool AssumeSingleUse) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = DemandedEltMask;
+ unsigned NumElts = DemandedElts.getBitWidth();
+ assert(VT.isVector() && "Expected vector op");
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
+ KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+
+ // Undef operand.
+ if (Op.isUndef()) {
+ KnownUndef.setAllBits();
+ return false;
+ }
+
+ // If Op has other users, assume that all elements are needed.
+ if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
+ DemandedElts.setAllBits();
+
+ // Not demanding any elements from Op.
+ if (DemandedElts == 0) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+
+ // Limit search depth.
+ if (Depth >= 6)
+ return false;
+
+ SDLoc DL(Op);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0]) {
+ KnownUndef.setAllBits();
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+ }
+ KnownUndef.setHighBits(NumElts - 1);
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // We only handle vectors here.
+ // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
+ if (!SrcVT.isVector())
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ if (NumSrcElts == NumElts)
+ return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1);
+
+ APInt SrcZero, SrcUndef;
+ APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+
+ // Bitcast from 'large element' src vector to 'small element' vector, we
+ // must demand a source element if any DemandedElt maps to it.
+ if ((NumElts % NumSrcElts) == 0) {
+ unsigned Scale = NumElts / NumSrcElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBit(i / Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If the src element is zero/undef then all the output elements will be -
+ // only demanded elements are guaranteed to be correct.
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ if (SrcDemandedElts[i]) {
+ if (SrcZero[i])
+ KnownZero.setBits(i * Scale, (i + 1) * Scale);
+ if (SrcUndef[i])
+ KnownUndef.setBits(i * Scale, (i + 1) * Scale);
+ }
+ }
+ }
+
+ // Bitcast from 'small element' src vector to 'large element' vector, we
+ // demand all smaller source elements covered by the larger demanded element
+ // of this vector.
+ if ((NumSrcElts % NumElts) == 0) {
+ unsigned Scale = NumSrcElts / NumElts;
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
+
+ if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
+ TLO, Depth + 1))
+ return true;
+
+ // If all the src elements covering an output element are zero/undef, then
+ // the output element will be as well, assuming it was demanded.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (DemandedElts[i]) {
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownZero.setBit(i);
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ KnownUndef.setBit(i);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ // Check all elements and simplify any unused elements with UNDEF.
+ if (!DemandedElts.isAllOnesValue()) {
+ // Don't simplify BROADCASTS.
+ if (llvm::any_of(Op->op_values(),
+ [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
+ SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
+ bool Updated = false;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i] && !Ops[i].isUndef()) {
+ Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
+ KnownUndef.setBit(i);
+ Updated = true;
+ }
+ }
+ if (Updated)
+ return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
+ }
+ }
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue SrcOp = Op.getOperand(i);
+ if (SrcOp.isUndef()) {
+ KnownUndef.setBit(i);
+ } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
+ (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ EVT SubVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVecs = Op.getNumOperands();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, i * NumSubElts);
+ KnownZero.insertBits(SubZero, i * NumSubElts);
+ }
+ break;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ if (!isa<ConstantSDNode>(Op.getOperand(2)))
+ break;
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
+ if (Idx.uge(NumElts - NumSubElts))
+ break;
+ unsigned SubIdx = Idx.getZExtValue();
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ APInt SubUndef, SubZero;
+ if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
+ Depth + 1))
+ return true;
+ APInt BaseElts = DemandedElts;
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+ if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef.insertBits(SubUndef, SubIdx);
+ KnownZero.insertBits(SubZero, SubIdx);
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ break;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ if (Idx.uge(NumSrcElts - NumElts))
+ break;
+ // Offset the demanded elts by the subvector index.
+ uint64_t SubIdx = Idx.getZExtValue();
+ APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx);
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, SubIdx);
+ KnownZero = SrcZero.extractBits(NumElts, SubIdx);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+
+ // For a legal, constant insertion index, if we don't need this insertion
+ // then strip it, else remove it from the demanded elts.
+ if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ DemandedElts.clearBit(Idx);
+
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownUndef.clearBit(Idx);
+ if (Scl.isUndef())
+ KnownUndef.setBit(Idx);
+
+ KnownZero.clearBit(Idx);
+ if (isNullConstant(Scl) || isNullFPConstant(Scl))
+ KnownZero.setBit(Idx);
+ break;
+ }
+
+ APInt VecUndef, VecZero;
+ if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
+ Depth + 1))
+ return true;
+ // Without knowing the insertion index we can't set KnownUndef/KnownZero.
+ break;
+ }
+ case ISD::VSELECT: {
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
+
+ // TODO - add support for constant vselect masks.
+
+ // See if we can simplify either vselect operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ KnownUndef = UndefLHS & UndefRHS;
+ KnownZero = ZeroLHS & ZeroRHS;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // Collect demanded elements from shuffle operands..
+ APInt DemandedLHS(NumElts, 0);
+ APInt DemandedRHS(NumElts, 0);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
+ }
+
+ // See if we can simplify either shuffle operand.
+ APInt UndefLHS, ZeroLHS;
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+
+ // Simplify mask using undef elements from LHS/RHS.
+ bool Updated = false;
+ bool IdentityLHS = true, IdentityRHS = true;
+ SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int &M = NewMask[i];
+ if (M < 0)
+ continue;
+ if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
+ (M >= (int)NumElts && UndefRHS[M - NumElts])) {
+ Updated = true;
+ M = -1;
+ }
+ IdentityLHS &= (M < 0) || (M == (int)i);
+ IdentityRHS &= (M < 0) || ((M - NumElts) == i);
+ }
+
+ // Update legal shuffle masks based on demanded elements if it won't reduce
+ // to Identity which can cause premature removal of the shuffle mask.
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
+ isShuffleMaskLegal(NewMask, VT))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
+ Op.getOperand(1), NewMask));
+
+ // Propagate undef/zero elements from LHS/RHS.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0) {
+ KnownUndef.setBit(i);
+ } else if (M < (int)NumElts) {
+ if (UndefLHS[M])
+ KnownUndef.setBit(i);
+ if (ZeroLHS[M])
+ KnownZero.setBit(i);
+ } else {
+ if (UndefRHS[M - NumElts])
+ KnownUndef.setBit(i);
+ if (ZeroRHS[M - NumElts])
+ KnownZero.setBit(i);
+ }
+ }
+ break;
+ }
+ case ISD::ADD:
+ case ISD::SUB: {
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+ KnownZero &= SrcZero;
+ KnownUndef &= SrcUndef;
+ break;
+ }
+ case ISD::TRUNCATE:
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+ break;
+ default: {
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
+ KnownZero, TLO, Depth))
+ return true;
+ break;
+ }
}
+ assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
return false;
}
@@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
+ TargetLoweringOpt &TLO, unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyDemandedVectorElts if you don't know whether Op"
+ " is a target node!");
+ return false;
+}
+
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
// work with truncating build vectors and vectors with elements of less than
// 8 bits.
@@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
-SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
- const SDLoc &DL) const {
- unsigned ElementWidth = VT.getScalarSizeInBits();
- APInt TrueInt =
- getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
- ? APInt(ElementWidth, 1)
- : APInt::getAllOnesValue(ElementWidth);
- return DAG.getConstant(TrueInt, DL, VT);
-}
-
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
@@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
return SDValue();
}
+/// There are multiple IR patterns that could be checking whether certain
+/// truncation of a signed number would be lossy or not. The pattern which is
+/// best at IR level, may not lower optimally. Thus, we want to unfold it.
+/// We are looking for the following pattern: (KeptBits is a constant)
+/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
+/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
+/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
+/// We will unfold it into the natural trunc+sext pattern:
+/// ((%x << C) a>> C) dstcond %x
+/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
+SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
+ EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // We must be comparing with a constant.
+ ConstantSDNode *C1;
+ if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
+ return SDValue();
+
+ // N0 should be: add %x, (1 << (KeptBits-1))
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // And we must be 'add'ing a constant.
+ ConstantSDNode *C01;
+ if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
+ return SDValue();
+
+ SDValue X = N0->getOperand(0);
+ EVT XVT = X.getValueType();
+
+ // Validate constants ...
+
+ APInt I1 = C1->getAPIntValue();
+
+ ISD::CondCode NewCond;
+ if (Cond == ISD::CondCode::SETULT) {
+ NewCond = ISD::CondCode::SETEQ;
+ } else if (Cond == ISD::CondCode::SETULE) {
+ NewCond = ISD::CondCode::SETEQ;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGT) {
+ NewCond = ISD::CondCode::SETNE;
+ // But need to 'canonicalize' the constant.
+ I1 += 1;
+ } else if (Cond == ISD::CondCode::SETUGE) {
+ NewCond = ISD::CondCode::SETNE;
+ } else
+ return SDValue();
+
+ const APInt &I01 = C01->getAPIntValue();
+ // Both of them must be power-of-two, and the constant from setcc is bigger.
+ if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2()))
+ return SDValue();
+
+ // They are power-of-two, so which bit is set?
+ const unsigned KeptBits = I1.logBase2();
+ const unsigned KeptBitsMinusOne = I01.logBase2();
+
+ // Magic!
+ if (KeptBits != (KeptBitsMinusOne + 1))
+ return SDValue();
+ assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
+
+ // We don't want to do this in every single case.
+ SelectionDAG &DAG = DCI.DAG;
+ if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
+ XVT, KeptBits))
+ return SDValue();
+
+ const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
+ assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
+
+ // Unfold into: ((%x << C) a>> C) cond %x
+ // Where 'cond' will be either 'eq' or 'ne'.
+ SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
+ SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
+ SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
+
+ return T2;
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
// These setcc operations always fold.
switch (Cond) {
default: break;
case ISD::SETFALSE:
- case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
+ case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
case ISD::SETTRUE:
- case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt =
- getBooleanContents(N0->getValueType(0));
- return DAG.getConstant(
- Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
- VT);
- }
+ case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
}
// Ensure that the constant occurs on the RHS and fold constant comparisons.
+ // TODO: Handle non-splat vector constants. All undef causes trouble.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (isa<ConstantSDNode>(N0.getNode()) &&
+ if (isConstOrConstSplat(N0) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = N0.getOperand(0).getValueType();
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
- getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
+ isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
EVT NewSetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
@@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (SDValue V =
+ optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
+ return V;
+ }
+
+ // These simplifications apply to splat vectors as well.
+ // TODO: Handle more splat vector cases.
+ if (auto *N1C = isConstOrConstSplat(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
APInt MinVal, MaxVal;
- unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
MinVal = APInt::getSignedMinValue(OperandBitSize);
MaxVal = APInt::getSignedMaxValue(OperandBitSize);
@@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
// X >= MIN --> true
if (C1 == MinVal)
- return DAG.getConstant(1, dl, VT);
-
- // X >= C0 --> X > (C0 - 1)
- APInt C = C1 - 1;
- ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
- if ((DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
- (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
- isLegalICmpImmediate(C.getSExtValue())))) {
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C, dl, N1.getValueType()),
- NewCC);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
+
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
}
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
// X <= MAX --> true
if (C1 == MaxVal)
- return DAG.getConstant(1, dl, VT);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
// X <= C0 --> X < (C0 + 1)
- APInt C = C1 + 1;
- ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
- if ((DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
- (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
- isLegalICmpImmediate(C.getSExtValue())))) {
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(C, dl, N1.getValueType()),
- NewCC);
- }
- }
-
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
- return DAG.getConstant(0, dl, VT); // X < MIN --> false
- if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
- return DAG.getConstant(1, dl, VT); // X >= MIN --> true
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
- return DAG.getConstant(0, dl, VT); // X > MAX --> false
- if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
- return DAG.getConstant(1, dl, VT); // X <= MAX --> true
-
- // Canonicalize setgt X, Min --> setne X, Min
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
- // Canonicalize setlt X, Max --> setne X, Max
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
-
- // If we have setult X, 1, turn it into seteq X, 0
- if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, dl, N0.getValueType()),
- ISD::SETEQ);
- // If we have setugt X, Max-1, turn it into seteq X, Max
- if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MaxVal, dl, N0.getValueType()),
- ISD::SETEQ);
+ if (!VT.isVector()) { // TODO: Support this for vectors.
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+ }
- // If we have "setcc X, C0", check to see if we can shrink the immediate
- // by changing cc.
+ if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
+ if (C1 == MinVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setlt X, Max --> setne X, Max
+ if (C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if (C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
- // SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
- C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(0, dl, N1.getValueType()),
- ISD::SETLT);
+ if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
+ if (C1 == MaxVal)
+ return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
+
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // Canonicalize setgt X, Min --> setne X, Min
+ if (C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if (C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ }
+ }
- // SETULT X, SINTMIN -> SETGT X, -1
- if (Cond == ISD::SETULT &&
- C1 == APInt::getSignedMinValue(OperandBitSize)) {
- SDValue ConstMinusOne =
- DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
- N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+ // TODO: Support this for vectors after legalize ops.
+ if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, dl, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
}
+ }
+
+ // Back to non-vector simplifications.
+ // TODO: Can we do these for vector splats?
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
@@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
auto &DL = DAG.getDataLayout();
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl,
@@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue())) {
auto &DL = DAG.getDataLayout();
- EVT ShiftTy = DCI.isBeforeLegalize()
- ? getPointerTy(DL)
- : getShiftAmountTy(N0.getValueType(), DL);
+ EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize());
EVT CmpTy = N0.getValueType();
SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
@@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
switch (ISD::getUnorderedFlavor(Cond)) {
default: llvm_unreachable("Unknown flavor!");
case 0: // Known false.
- return DAG.getConstant(0, dl, VT);
+ return DAG.getBoolConstant(false, dl, VT, OpVT);
case 1: // Known true.
- return DAG.getConstant(1, dl, VT);
+ return DAG.getBoolConstant(true, dl, VT, OpVT);
case 2: // Undefined.
return DAG.getUNDEF(VT);
}
@@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
- uint64_t EqVal = 0;
- switch (getBooleanContents(N0.getValueType())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- EqVal = ISD::isTrueWhenEqual(Cond);
- break;
- case ZeroOrNegativeOneBooleanContent:
- EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
- break;
- }
+
+ bool EqTrue = ISD::isTrueWhenEqual(Cond);
// We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger()) {
- return DAG.getConstant(EqVal, dl, VT);
- }
+ if (N0.getValueType().isInteger())
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(EqVal, dl, VT);
- if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(EqVal, dl, VT);
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
+ if (UOF == unsigned(EqTrue))
+ return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
- getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
+ if (NewCond != Cond &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue SH = DAG.getNode(
ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, dl,
- getShiftAmountTy(N1.getValueType(), DL)));
+ getShiftAmountTy(N1.getValueType(), DL,
+ !DCI.isBeforeLegalize())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// X == (Z-X) --> X<<1 == Z
SDValue SH = DAG.getNode(
ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
+ DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
+ !DCI.isBeforeLegalize())));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Fold away ALL boolean setcc's.
SDValue Temp;
- if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
+ EVT OpVT = N0.getValueType();
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: // X == Y -> ~(X^Y)
- Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
- N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, OpVT);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETNE: // X != Y --> (X^Y)
- N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
break;
case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
- Temp = DAG.getNOT(dl, N0, MVT::i1);
- N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
- Temp = DAG.getNOT(dl, N1, MVT::i1);
- N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
- Temp = DAG.getNOT(dl, N0, MVT::i1);
- N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ Temp = DAG.getNOT(dl, N0, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(Temp.getNode());
break;
case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
- Temp = DAG.getNOT(dl, N1, MVT::i1);
- N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ Temp = DAG.getNOT(dl, N1, OpVT);
+ N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
break;
}
- if (VT != MVT::i1) {
+ if (VT.getScalarType() != MVT::i1) {
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(N0.getNode());
// FIXME: If running after legalize, we probably can't do this.
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
+ N0 = DAG.getNode(ExtendCode, dl, VT, N0);
}
return N0;
}
@@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
}
}
-/// \brief Given an exact SDIV by a constant, create a multiplication
+/// Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
const SDLoc &dl, SelectionDAG &DAG,
@@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
-/// \brief Given an ISD::SDIV node expressing a divide by constant,
+/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
@@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
}
-/// \brief Given an ISD::UDIV node expressing a divide by constant,
+/// Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
@@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
return DAG.getMergeValues({ Value, NewChain }, SL);
}
-// FIXME: This relies on each element having a byte size, otherwise the stride
-// is 0 and just overwrites the same location. ExpandStore currently expects
-// this broken behavior.
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SelectionDAG &DAG) const {
SDLoc SL(ST);
@@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- // Store Stride in bytes
- unsigned Stride = MemSclVT.getSizeInBits() / 8;
EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
unsigned NumElem = StVT.getVectorNumElements();
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!MemSclVT.isByteSized()) {
+ unsigned NumBits = StVT.getSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+
+ SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
+ SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
+ SDValue ShiftedElt =
+ DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
+ CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
+ }
+
+ return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
+ ST->getAlignment(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
+ }
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ assert (Stride && "Zero stride!");
// Extract each of the elements from the original vector and save them into
// memory individually.
SmallVector<SDValue, 8> Stores;
@@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
// Scalarize the load and let the individual components be handled.
SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
+ if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
+ return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
}