summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3368
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp332
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp75
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp113
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h34
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp415
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp479
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp311
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp62
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h89
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp58
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp202
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp687
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp38
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1298
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1572
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp18
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp90
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp429
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1486
27 files changed, 6683 insertions, 4522 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e5bc08b9280ab..f14b3dba4f318 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -124,17 +125,29 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
cl::desc("Limit the number of times for the same StoreNode and RootNode "
"to bail out in store merging dependence check"));
+static cl::opt<bool> EnableReduceLoadOpStoreWidth(
+ "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
+ cl::desc("DAG cominber enable reducing the width of load/op/store "
+ "sequence"));
+
+static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
+ "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
+ cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ "a narrower store"));
+
namespace {
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
+ const SelectionDAGTargetInfo *STI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
+ bool DisableGenericCombines;
/// Worklist of all of the nodes that need to be simplified.
///
@@ -222,9 +235,11 @@ namespace {
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), AA(AA) {
+ : DAG(D), TLI(D.getTargetLoweringInfo()),
+ STI(D.getSubtarget().getSelectionDAGInfo()),
+ Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
ForCodeSize = DAG.shouldOptForSize();
+ DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
MaximumLegalStoreInBits = 0;
// We use the minimum store size here, since that's all we can guarantee
@@ -307,23 +322,34 @@ namespace {
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
- EVT VT = Op.getValueType();
- unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
- return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
}
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
+ // TODO: For now just pretend it cannot be simplified.
+ if (Op.getValueType().isScalableVector())
+ return false;
+
unsigned NumElts = Op.getValueType().getVectorNumElements();
APInt DemandedElts = APInt::getAllOnesValue(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
- const APInt &DemandedElts);
+ const APInt &DemandedElts,
+ bool AssumeSingleUse = false);
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
@@ -429,11 +455,13 @@ namespace {
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitAssertExt(SDNode *N);
+ SDValue visitAssertAlign(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
+ SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -522,9 +550,8 @@ namespace {
SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) const;
+ SDValue &CC, bool MatchStrict = false) const;
bool isOneUseSetCC(SDValue N) const;
- bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -553,6 +580,10 @@ namespace {
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
+ SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue MatchStoreCombine(StoreSDNode *N);
@@ -562,6 +593,7 @@ namespace {
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue convertBuildVecZextToZext(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecTruncToBitCast(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
@@ -606,6 +638,19 @@ namespace {
: MemNode(N), OffsetFromBase(Offset) {}
};
+ // Classify the origin of a stored value.
+ enum class StoreSource { Unknown, Constant, Extract, Load };
+ StoreSource getStoreSource(SDValue StoreVal) {
+ if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
+ return StoreSource::Constant;
+ if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ return StoreSource::Extract;
+ if (isa<LoadSDNode>(StoreVal))
+ return StoreSource::Load;
+ return StoreSource::Unknown;
+ }
+
/// This is a helper function for visitMUL to check the profitability
/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
/// MulNode is the original multiply, AddNode is (add x, c1),
@@ -633,43 +678,66 @@ namespace {
/// can be combined into narrow loads.
bool BackwardsPropagateMask(SDNode *N);
- /// Helper function for MergeConsecutiveStores which merges the
- /// component store chains.
+ /// Helper function for mergeConsecutiveStores which merges the component
+ /// store chains.
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
- /// This is a helper function for MergeConsecutiveStores. When the
- /// source elements of the consecutive stores are all constants or
- /// all extracted vector elements, try to merge them into one
- /// larger store introducing bitcasts if necessary. \return True
- /// if a merged store was created.
- bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ /// This is a helper function for mergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store introducing
+ /// bitcasts if necessary. \return True if a merged store was created.
+ bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector,
bool UseTrunc);
- /// This is a helper function for MergeConsecutiveStores. Stores
- /// that potentially may be merged with St are placed in
- /// StoreNodes. RootNode is a chain predecessor to all store
- /// candidates.
+ /// This is a helper function for mergeConsecutiveStores. Stores that
+ /// potentially may be merged with St are placed in StoreNodes. RootNode is
+ /// a chain predecessor to all store candidates.
void getStoreMergeCandidates(StoreSDNode *St,
SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&Root);
- /// Helper function for MergeConsecutiveStores. Checks if
- /// candidate stores have indirect dependency through their
- /// operands. RootNode is the predecessor to all stores calculated
- /// by getStoreMergeCandidates and is used to prune the dependency check.
- /// \return True if safe to merge.
+ /// Helper function for mergeConsecutiveStores. Checks if candidate stores
+ /// have indirect dependency through their operands. RootNode is the
+ /// predecessor to all stores calculated by getStoreMergeCandidates and is
+ /// used to prune the dependency check. \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode);
+ /// This is a helper function for mergeConsecutiveStores. Given a list of
+ /// store candidates, find the first N that are consecutive in memory.
+ /// Returns 0 if there are not at least 2 consecutive stores to try merging.
+ unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const;
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of constant values.
+ bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *Root, bool AllowVectors);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of extracted vector elements.
+ /// When extracting multiple vector elements, try to store them in one
+ /// vector store rather than a sequence of scalar stores.
+ bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root);
+
+ /// This is a helper function for mergeConsecutiveStores. It is used for
+ /// store chains that are composed entirely of loaded values.
+ bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *Root, bool AllowVectors,
+ bool IsNonTemporalStore, bool IsNonTemporalLoad);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
- /// \return number of stores that were merged into a merged store (the
- /// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *St);
+ /// \return true if stores were merged.
+ bool mergeConsecutiveStores(StoreSDNode *St);
/// Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -814,7 +882,7 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
// the appropriate nodes based on the type of node we are checking. This
// simplifies life a bit for the callers.
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) const {
+ SDValue &CC, bool MatchStrict) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
@@ -822,6 +890,15 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
return true;
}
+ if (MatchStrict &&
+ (N.getOpcode() == ISD::STRICT_FSETCC ||
+ N.getOpcode() == ISD::STRICT_FSETCCS)) {
+ LHS = N.getOperand(1);
+ RHS = N.getOperand(2);
+ CC = N.getOperand(3);
+ return true;
+ }
+
if (N.getOpcode() != ISD::SELECT_CC ||
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
@@ -886,6 +963,13 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
+// Determine if this an indexed load with an opaque target constant index.
+static bool canSplitIdx(LoadSDNode *LD) {
+ return MaySplitLoadIndex &&
+ (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
+}
+
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
SDValue N0,
@@ -951,14 +1035,11 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- // Don't reassociate reductions.
- if (N0->getFlags().hasVectorReduction())
- return SDValue();
-
- if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ if (SDValue OpNode =
+ DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
@@ -978,9 +1059,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags) {
assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
- // Don't reassociate reductions.
- if (Flags.hasVectorReduction())
- return SDValue();
// Floating-point reassociation is not allowed without loose FP math.
if (N0.getValueType().isFloatingPoint() ||
@@ -1029,6 +1107,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorklistRemover DeadNodes(*this);
@@ -1047,21 +1131,17 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
- const APInt &DemandedElts) {
+ const APInt &DemandedElts,
+ bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
+ AssumeSingleUse))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
- // Replace the old value with the new one.
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
-
CommitTargetLoweringOpt(TLO);
return true;
}
@@ -1081,12 +1161,6 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
// Revisit the node.
AddToWorklist(Op.getNode());
- // Replace the old value with the new one.
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
-
CommitTargetLoweringOpt(TLO);
return true;
}
@@ -1210,8 +1284,11 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
- // We are always replacing N0/N1's use in N and only need
- // additional replacements if there are additional uses.
+ // We are always replacing N0/N1's use in N and only need additional
+ // replacements if there are additional uses.
+ // Note: We are checking uses of the *nodes* (SDNode) rather than values
+ // (SDValue) here because the node may reference multiple values
+ // (for example, the chain value of a load node).
Replace0 &= !N0->hasOneUse();
Replace1 &= (N0 != N1) && !N1->hasOneUse();
@@ -1561,6 +1638,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::AssertSext:
case ISD::AssertZext: return visitAssertExt(N);
+ case ISD::AssertAlign: return visitAssertAlign(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1610,6 +1688,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::FREEZE: return visitFREEZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1628,7 +1707,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
}
SDValue DAGCombiner::combine(SDNode *N) {
- SDValue RV = visit(N);
+ SDValue RV;
+ if (!DisableGenericCombines)
+ RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (!RV.getNode()) {
@@ -2046,12 +2127,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
- // TODO - support non-uniform vector amounts.
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
- ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
- if (!C || ShiftOp.getOpcode() != ISD::SRL)
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
+ ShiftOp.getOpcode() != ISD::SRL)
return SDValue();
// The shift must be of a 'not' value.
@@ -2072,8 +2152,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
- APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
- return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
+ if (SDValue NewC =
+ DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
+ {ConstantOp, DAG.getConstant(1, DL, VT)}))
+ return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
+ return SDValue();
}
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
@@ -2109,8 +2192,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
// fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
- N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
}
// fold (add x, 0) -> x
@@ -2121,8 +2203,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold ((A-c1)+c2) -> (A+(c2-c1))
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
- N0.getOperand(1).getNode());
+ SDValue Sub =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
assert(Sub && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
}
@@ -2130,8 +2212,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
- N0.getOperand(0).getNode());
+ SDValue Add =
+ DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
assert(Add && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
@@ -2152,13 +2234,14 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
}
- // Undo the add -> or combine to merge constant offsets from a frame index.
+ // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
+ // equivalent to (add x, c0).
if (N0.getOpcode() == ISD::OR &&
- isa<FrameIndexSDNode>(N0.getOperand(0)) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
- SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+ if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
+ {N1, N0.getOperand(1)}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
}
}
@@ -2317,6 +2400,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
+ if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
+ APInt C0 = N0->getConstantOperandAPInt(0);
+ APInt C1 = N1->getConstantOperandAPInt(0);
+ return DAG.getVScale(DL, VT, C0 + C1);
+ }
+
+ // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
+ if ((N0.getOpcode() == ISD::ADD) &&
+ (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
+ (N1.getOpcode() == ISD::VSCALE)) {
+ auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ auto VS1 = N1->getConstantOperandAPInt(0);
+ auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
+ }
+
return SDValue();
}
@@ -2347,8 +2447,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold (add_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
- N1.getNode());
+ return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
}
// fold (add_sat x, 0) -> x
@@ -2968,12 +3067,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // fold (sub c1, c2) -> c1-c2
- return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
- N1.getNode());
- }
+
+ // fold (sub c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -3040,8 +3137,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
}
@@ -3051,8 +3148,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
- N11.getNode());
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
@@ -3062,8 +3158,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
}
@@ -3072,8 +3168,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(
- ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ SDValue NewC =
+ DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
@@ -3244,6 +3340,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
+ if (N1.getOpcode() == ISD::VSCALE) {
+ APInt IntVal = N1.getConstantOperandAPInt(0);
+ return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
+ }
+
// Prefer an add for more folding potential and possibly better codegen:
// sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
@@ -3294,12 +3396,9 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (N0 == N1)
return DAG.getConstant(0, DL, VT);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // fold (sub_sat c1, c2) -> c3
- return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
- N1.getNode());
- }
+ // fold (sub_sat c1, c2) -> c3
+ if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
+ return C;
// fold (sub_sat x, 0) -> x
if (isNullConstant(N1))
@@ -3435,30 +3534,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
- bool N0IsConst = false;
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
- bool N0IsOpaqueConst = false;
- APInt ConstValue0, ConstValue1;
+ APInt ConstValue1;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
- assert((!N0IsConst ||
- ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
- "Splat APInt should be element width");
assert((!N1IsConst ||
ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
"Splat APInt should be element width");
} else {
- N0IsConst = isa<ConstantSDNode>(N0);
- if (N0IsConst) {
- ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
- N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
- }
N1IsConst = isa<ConstantSDNode>(N1);
if (N1IsConst) {
ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
@@ -3467,17 +3556,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul c1, c2) -> c1*c2
- if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
- return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isNullValue())
return N1;
+
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1.isOneValue())
return N0;
@@ -3491,6 +3581,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
}
+
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) &&
@@ -3501,6 +3592,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
+
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
@@ -3589,6 +3681,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getNode(ISD::MUL, SDLoc(N1), VT,
N0.getOperand(1), N1));
+ // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
+ if (N0.getOpcode() == ISD::VSCALE)
+ if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
+ APInt C0 = N0.getConstantOperandAPInt(0);
+ APInt C1 = NC1->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 * C1);
+ }
+
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
@@ -3746,13 +3846,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDLoc DL(N);
// fold (sdiv c1, c2) -> c1/c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
if (N1C && N1C->getAPIntValue().isMinSignedValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3890,12 +3991,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDLoc DL(N);
// fold (udiv c1, c2) -> c1/c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
- N0C, N1C))
- return Folded;
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
+ return C;
+
// fold (udiv X, -1) -> select(X == -1, 1, 0)
if (N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -3988,11 +4087,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
SDLoc DL(N);
// fold (rem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
- return Folded;
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
// fold (urem X, -1) -> select(X == -1, 0, x)
if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
@@ -4088,7 +4186,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
- if (VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4144,7 +4242,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
- if (VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4317,6 +4415,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
// fold vector ops
if (VT.isVector())
@@ -4324,19 +4423,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return FoldedVOp;
// fold operation with constant operands.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
- unsigned Opcode = N->getOpcode();
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4825,11 +4921,16 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
return false;
// Ensure that this isn't going to produce an unsupported memory access.
- if (ShAmt &&
- !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- LDST->getAddressSpace(), ShAmt / 8,
- LDST->getMemOperand()->getFlags()))
- return false;
+ if (ShAmt) {
+ assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
+ const unsigned ByteShAmt = ShAmt / 8;
+ const Align LDSTAlign = LDST->getAlign();
+ const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ LDST->getAddressSpace(), NarrowAlign,
+ LDST->getMemOperand()->getFlags()))
+ return false;
+ }
// It's not possible to generate a constant of extended or untyped type.
EVT PtrType = LDST->getBasePtr().getValueType();
@@ -5174,17 +5275,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
// fold (and c1, c2) -> c1&c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
return N0;
+
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
@@ -5654,6 +5757,48 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
return false;
}
+// Match this pattern:
+// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
+// And rewrite this to:
+// (rotr (bswap A), 16)
+static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDNode *N, SDValue N0,
+ SDValue N1, EVT VT, EVT ShiftAmountTy) {
+ assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
+ "MatchBSwapHWordOrAndAnd: expecting i32");
+ if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
+ return SDValue();
+ // TODO: this is too restrictive; lifting this restriction requires more tests
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+ ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
+ ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
+ if (!Mask0 || !Mask1)
+ return SDValue();
+ if (Mask0->getAPIntValue() != 0xff00ff00 ||
+ Mask1->getAPIntValue() != 0x00ff00ff)
+ return SDValue();
+ SDValue Shift0 = N0.getOperand(0);
+ SDValue Shift1 = N1.getOperand(0);
+ if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
+ return SDValue();
+ ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
+ ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
+ if (!ShiftAmt0 || !ShiftAmt1)
+ return SDValue();
+ if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
+ return SDValue();
+ if (Shift0.getOperand(0) != Shift1.getOperand(0))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
+ SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+}
+
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
@@ -5670,6 +5815,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+ // Try again with commuted operands.
+ if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
+ getShiftAmountTy(VT)))
+ return BSwap;
+
+
// Look for either
// (or (bswaphpair), (bswaphpair))
// (or (or (bswaphpair), (and)), (and))
@@ -5875,17 +6030,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
// fold (or c1, c2) -> c1|c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+
// fold (or x, 0) -> x
if (isNullConstant(N1))
return N0;
+
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
@@ -5920,8 +6077,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
};
if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
- if (SDValue COR = DAG.FoldConstantArithmetic(
- ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ {N1, N0.getOperand(1)})) {
SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
AddToWorklist(IOR.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
@@ -6020,6 +6177,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// (add v v) -> (shl v 1)
+ // TODO: Should this be a general DAG canonicalization?
if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
ExtractFrom.getOpcode() == ISD::ADD &&
ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
@@ -6192,8 +6350,12 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
+ //
+ // We also need to account for a potential truncation of NegOp1 if the amount
+ // has already been legalized to a shift amount type.
APInt Width;
- if (Pos == NegOp1)
+ if ((Pos == NegOp1) ||
+ (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
Width = NegC->getAPIntValue();
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
@@ -6246,19 +6408,91 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return SDValue();
}
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+// TODO: Merge with MatchRotatePosNeg.
+SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, unsigned PosOpcode,
+ unsigned NegOpcode, const SDLoc &DL) {
+ EVT VT = N0.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+
+ // fold (or (shl x0, (*ext y)),
+ // (srl x1, (*ext (sub 32, y)))) ->
+ // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
+ //
+ // fold (or (shl x0, (*ext (sub 32, y))),
+ // (srl x1, (*ext y))) ->
+ // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+ bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
+ HasPos ? Pos : Neg);
+ }
+
+ // Matching the shift+xor cases, we can't easily use the xor'd shift amount
+ // so for now just use the PosOpcode case if its legal.
+ // TODO: When can we use the NegOpcode case?
+ if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
+ auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
+ if (Op.getOpcode() != BinOpc)
+ return false;
+ ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
+ return Cst && (Cst->getAPIntValue() == Imm);
+ };
+
+ // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
+ // -> (fshl x0, x1, y)
+ if (IsBinOpImm(N1, ISD::SRL, 1) &&
+ IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
+ InnerPos == InnerNeg.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
+ return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
+ }
+
+ // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ if (IsBinOpImm(N0, ISD::SHL, 1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+
+ // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
+ // -> (fshr x0, x1, y)
+ // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
+ IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
+ InnerNeg == InnerPos.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
+ return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
+ }
+ }
+
+ return SDValue();
+}
+
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
-// a rot[lr].
+// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
+// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT))
return SDValue();
- // The target must have at least one rotate flavor.
+ // The target must have at least one rotate/funnel flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR)
+ bool HasFSHL = hasOperation(ISD::FSHL, VT);
+ bool HasFSHR = hasOperation(ISD::FSHR, VT);
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6308,12 +6542,13 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// At this point we've matched or extracted a shift op on each side.
- if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return SDValue(); // Not shifting the same value.
-
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ if (!IsRotate && !(HasFSHL || HasFSHR))
+ return SDValue(); // Requires funnel shift support.
+
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
@@ -6329,13 +6564,21 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
+ // iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
- SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
- LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ SDValue Res;
+ if (IsRotate && (HasROTL || HasROTR))
+ Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt);
+ else
+ Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -6353,10 +6596,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
+ Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
}
- return Rot;
+ return Res;
}
// If there is a mask here, and we have a variable shift, we can't be sure
@@ -6379,13 +6622,29 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
- LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (IsRotate && (HasROTL || HasROTR)) {
+ SDValue TryL =
+ MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
+ RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDValue TryR =
+ MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
+ LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
+ }
+
+ SDValue TryL =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
if (TryL)
return TryL;
- SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
- RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ SDValue TryR =
+ MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
if (TryR)
return TryR;
@@ -6610,9 +6869,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
return SDValue();
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
SDValue CombinedValue;
SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
@@ -6630,17 +6889,16 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
SDValue Value = Trunc.getOperand(0);
if (Value.getOpcode() == ISD::SRL ||
Value.getOpcode() == ISD::SRA) {
- ConstantSDNode *ShiftOffset =
- dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
+ auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
// a constant and a multiple of 8. It is the byte offset in "y".
- //
+ //
// x = srl y, offset
- // i8 z = trunc x
+ // i8 z = trunc x
// store z, ...
if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
return SDValue();
-
+
Offset = ShiftOffset->getSExtValue()/8;
Value = Value.getOperand(0);
}
@@ -6685,7 +6943,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
- // Check if the bytes of the combined value we are looking at match with
+ // Check if the bytes of the combined value we are looking at match with
// either big or little endian value store.
Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
if (!IsBigEndian.hasValue())
@@ -7030,20 +7288,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
+
// fold (xor x, undef) -> undef
if (N0.isUndef())
return N0;
if (N1.isUndef())
return N1;
+
// fold (xor c1, c2) -> c1^c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
+ return C;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
@@ -7058,7 +7318,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
- if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ if (TLI.isConstTrueVal(N1.getNode()) &&
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -7071,6 +7332,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
case ISD::SELECT_CC:
return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ if (N0.hasOneUse()) {
+ // FIXME Can we handle multiple uses? Could we token factor the chain
+ // results from the new/old setcc?
+ SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0),
+ N0Opcode == ISD::STRICT_FSETCCS);
+ CombineTo(N, SetCC);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
+ recursivelyDeleteUnusedNodes(N0.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ break;
+ }
}
}
}
@@ -7405,15 +7681,29 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
// fold (rot x, c) -> (rot x, c % BitSize)
- // TODO - support non-uniform vector amounts.
- if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
- if (Cst->getAPIntValue().uge(Bitsize)) {
- uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
- return DAG.getNode(N->getOpcode(), dl, VT, N0,
- DAG.getConstant(RotAmt, dl, N1.getValueType()));
- }
+ bool OutOfRange = false;
+ auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
+ OutOfRange |= C->getAPIntValue().uge(Bitsize);
+ return true;
+ };
+ if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
+ EVT AmtVT = N1.getValueType();
+ SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
+ if (SDValue Amt =
+ DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
}
+ // rot i16 X, 8 --> bswap X
+ auto *RotAmtC = isConstOrConstSplat(N1);
+ if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
+ VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
+ return DAG.getNode(ISD::BSWAP, dl, VT, N0);
+
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -7430,12 +7720,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift =
- DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
- BitsizeC.getNode());
+ ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
CombinedShiftNorm);
}
@@ -7471,8 +7760,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
- N01CV, N1CV))
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
}
@@ -7482,10 +7771,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -7502,8 +7789,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
@@ -7691,9 +7977,90 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
+ // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
+ if (N0.getOpcode() == ISD::VSCALE)
+ if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
+ auto DL = SDLoc(N);
+ APInt C0 = N0.getConstantOperandAPInt(0);
+ APInt C1 = NC1->getAPIntValue();
+ return DAG.getVScale(DL, VT, C0 << C1);
+ }
+
return SDValue();
}
+// Transform a right shift of a multiply into a multiply-high.
+// Examples:
+// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
+// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
+static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+
+ // Check the shift amount. Proceed with the transformation if the shift
+ // amount is constant.
+ ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
+ if (!ShiftAmtSrc)
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // The operation feeding into the shift must be a multiply.
+ SDValue ShiftOperand = N->getOperand(0);
+ if (ShiftOperand.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // Both operands must be equivalent extend nodes.
+ SDValue LeftOp = ShiftOperand.getOperand(0);
+ SDValue RightOp = ShiftOperand.getOperand(1);
+ bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
+ bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
+
+ if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+
+ EVT WideVT1 = LeftOp.getValueType();
+ EVT WideVT2 = RightOp.getValueType();
+ (void)WideVT2;
+ // Proceed with the transformation if the wide types match.
+ assert((WideVT1 == WideVT2) &&
+ "Cannot have a multiply node with two different operand types.");
+
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+
+ // Only transform into mulh if mulh for the narrow type is cheaper than
+ // a multiply followed by a shift. This should also check if mulh is
+ // legal for NarrowVT on the target.
+ if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
+ return SDValue();
+
+ // Proceed with the transformation if the wide type is twice as large
+ // as the narrow type.
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+ if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ return SDValue();
+
+ // Check the shift amount with the narrow type size.
+ // Proceed with the transformation if the shift amount is the width
+ // of the narrow type.
+ unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
+ if (ShiftAmt != NarrowVTSize)
+ return SDValue();
+
+ // If the operation feeding into the MUL is a sign extend (sext),
+ // we use mulhs. Othewise, zero extends (zext) use mulhu.
+ unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+
+ SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
+ RightOp.getOperand(0));
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+}
+
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -7717,10 +8084,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -7811,7 +8176,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
- if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
N0.getOperand(0).getOpcode() == ISD::SHL &&
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
@@ -7828,7 +8193,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// implementation and/or target-specific overrides (because
// non-simple types likely require masking when legalized), but that
// restriction may conflict with other transforms.
- if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
SDLoc DL(N);
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
@@ -7871,8 +8237,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// Simplify, based on bits shifted out of the LHS.
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// If the sign bit is known to be zero, switch this to a SRL.
@@ -7883,6 +8248,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
return SDValue();
}
@@ -7903,10 +8273,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- if (N0C && N1C && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
+ return C;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -8070,8 +8438,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold operands of srl based on knowledge that the low bits are not
// demanded.
- // TODO - support non-uniform vector shift amounts.
- if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
@@ -8111,6 +8478,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
+ // Try to transform this shift into a multiply-high if
+ // it matches the appropriate pattern detected in combineShiftToMULH.
+ if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+ return MULH;
+
return SDValue();
}
@@ -8160,6 +8532,45 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
SDLoc(N), ShAmtTy));
+
+ // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
+ // TODO - bigendian support once we have test coverage.
+ // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
+ // TODO - permit LHS EXTLOAD if extensions are shifted out.
+ if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
+ !DAG.getDataLayout().isBigEndian()) {
+ auto *LHS = dyn_cast<LoadSDNode>(N0);
+ auto *RHS = dyn_cast<LoadSDNode>(N1);
+ if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
+ LHS->getAddressSpace() == RHS->getAddressSpace() &&
+ (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
+ ISD::isNON_EXTLoad(LHS)) {
+ if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
+ SDLoc DL(RHS);
+ uint64_t PtrOff =
+ IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
+ Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ RHS->getAddressSpace(), NewAlign,
+ RHS->getMemOperand()->getFlags(), &Fast) &&
+ Fast) {
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
+ AddToWorklist(NewPtr.getNode());
+ SDValue Load = DAG.getLoad(
+ VT, DL, RHS->getChain(), NewPtr,
+ RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+ }
}
// fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
@@ -8609,7 +9020,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
@@ -8825,6 +9236,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
SDValue N2Elt = N2.getOperand(i);
if (N1Elt.isUndef() || N2Elt.isUndef())
continue;
+ if (N1Elt.getValueType() != N2Elt.getValueType())
+ continue;
const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
@@ -9395,8 +9808,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
N1.getOperand(1));
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL0(N0);
SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
DAG.getConstant(Mask, DL0, VT));
@@ -9702,8 +10114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.sext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
@@ -9941,7 +10352,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
- Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
AddToWorklist(Op.getNode());
SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
// Transfer the debug info; the new node is equivalent to N0.
@@ -9953,7 +10364,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
AddToWorklist(Op.getNode());
- SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
// We may safely transfer the debug info describing the truncate node over
// to the equivalent and operation.
DAG.transferDbgValues(N0, And);
@@ -9971,8 +10382,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
@@ -10026,8 +10436,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN00->getChain(), LN00->getBasePtr(),
LN00->getMemoryVT(),
LN00->getMemOperand());
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
@@ -10080,23 +10489,22 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// that the element size of the sext'd result matches the element size of
// the compare operands.
SDLoc DL(N);
- SDValue VecOnes = DAG.getConstant(1, DL, VT);
if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
- // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
- return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
+ return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
}
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
- // truncate/sign extend.
+ // truncate/any extend followed by zext_in_reg.
EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
- return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
- VecOnes);
+ return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
+ N0.getValueType());
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -10127,7 +10535,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDLoc DL(N);
// Ensure that the shift amount is wide enough for the shifted value.
- if (VT.getSizeInBits() >= 256)
+ if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
return DAG.getNode(N0.getOpcode(), DL, VT,
@@ -10187,8 +10595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, DL, VT);
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask = Mask.zext(VT.getSizeInBits());
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -10348,6 +10755,45 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
+ SDLoc DL(N);
+
+ Align AL = cast<AssertAlignSDNode>(N)->getAlign();
+ SDValue N0 = N->getOperand(0);
+
+ // Fold (assertalign (assertalign x, AL0), AL1) ->
+ // (assertalign x, max(AL0, AL1))
+ if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
+ return DAG.getAssertAlign(DL, N0.getOperand(0),
+ std::max(AL, AAN->getAlign()));
+
+ // In rare cases, there are trivial arithmetic ops in source operands. Sink
+ // this assert down to source operands so that those arithmetic ops could be
+ // exposed to the DAG combining.
+ switch (N0.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ unsigned AlignShift = Log2(AL);
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+ unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
+ unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+ if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
+ if (LHSAlignShift < AlignShift)
+ LHS = DAG.getAssertAlign(DL, LHS, AL);
+ if (RHSAlignShift < AlignShift)
+ RHS = DAG.getAssertAlign(DL, RHS, AL);
+ return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
/// If the result of a wider load is shifted to right of N bits and then
/// truncated to a narrower type and where N is a multiple of number of bits of
/// the narrower type, transform it to a narrower load from address + N / num of
@@ -10428,9 +10874,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
}
// At this point, we must have a load or else we can't do the transform.
- if (!isa<LoadSDNode>(N0)) return SDValue();
-
- auto *LN0 = cast<LoadSDNode>(N0);
+ auto *LN0 = dyn_cast<LoadSDNode>(N0);
+ if (!LN0) return SDValue();
// Because a SRL must be assumed to *need* to zero-extend the high bits
// (as opposed to anyext the high bits), we can't combine the zextload
@@ -10449,8 +10894,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDNode *Mask = *(SRL->use_begin());
if (Mask->getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Mask->getOperand(1))) {
- const APInt &ShiftMask =
- cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
+ const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
@@ -10480,7 +10924,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
// Reducing the width of a volatile load is illegal. For atomics, we may be
- // able to reduce the width provided we never widen again. (see D66309)
+ // able to reduce the width provided we never widen again. (see D66309)
if (!LN0->isSimple() ||
!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
@@ -10561,26 +11005,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- EVT EVT = cast<VTSDNode>(N1)->getVT();
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
unsigned VTBits = VT.getScalarSizeInBits();
- unsigned EVTBits = EVT.getScalarSizeInBits();
+ unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
// fold (sext_in_reg c1) -> c1
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- N0.getOperand(0), N1);
+ ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
+ N1);
// fold (sext_in_reg (sext x)) -> (sext x)
// fold (sext_in_reg (aext x)) -> (sext x)
@@ -10589,8 +11034,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= EVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
+ if ((N00Bits <= ExtVTBits ||
+ (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -10599,7 +11044,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
- N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
if (!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
@@ -10610,14 +11055,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getScalarValueSizeInBits() == EVTBits &&
+ if (N00.getScalarValueSizeInBits() == ExtVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
- return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
@@ -10634,11 +11079,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
+ if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1));
}
@@ -10650,14 +11095,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// extends that the target does support.
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), EVT,
+ LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -10667,13 +11112,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
- EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
LN0->getChain(),
- LN0->getBasePtr(), EVT,
+ LN0->getBasePtr(), ExtVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
@@ -10681,11 +11126,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
- if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false))
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
- BSwap, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
}
return SDValue();
@@ -10695,8 +11139,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10711,8 +11156,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
if (N0.isUndef())
- return DAG.getUNDEF(VT);
+ return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -10788,13 +11234,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDLoc DL(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
DAG.getBitcast(NVT, N0.getOperand(0)),
- DAG.getConstant(Index, DL, IndexTy));
+ DAG.getVectorIdxConstant(Index, DL));
}
}
@@ -10832,7 +11277,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
+ // Avoid creating illegal types if running after type legalizer.
+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
SDLoc DL(N);
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 8> TruncOps;
@@ -10961,10 +11408,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
}
}
@@ -11064,14 +11510,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
unsigned LD1Bytes = LD1VT.getStoreSize();
if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- unsigned Align = LD1->getAlignment();
- unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ Align Alignment = LD1->getAlign();
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign <= Align &&
+ if (NewAlign <= Alignment &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Align);
+ LD1->getPointerInfo(), Alignment);
}
return SDValue();
@@ -11389,6 +11835,20 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
+SDValue DAGCombiner::visitFREEZE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // (freeze (freeze x)) -> (freeze x)
+ if (N0.getOpcode() == ISD::FREEZE)
+ return N0;
+
+ // If the input is a constant, return it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
+ return N0;
+
+ return SDValue();
+}
+
/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
/// operands. DstEltVT indicates the destination element value type.
SDValue DAGCombiner::
@@ -11519,7 +11979,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
@@ -11532,13 +11992,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
+ bool CanReassociate =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
CanFuse || HasFMAD);
// If the addition is not contractable, do not combine.
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
- const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
@@ -11573,6 +12034,30 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N0, Flags);
}
+ // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
+ // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+ // This requires reassociation because it changes the order of operations.
+ SDValue FMA, E;
+ if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
+ N0.getOperand(2).hasOneUse()) {
+ FMA = N0;
+ E = N1;
+ } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
+ N1.getOperand(2).hasOneUse()) {
+ FMA = N1;
+ E = N0;
+ }
+ if (FMA && E) {
+ SDValue A = FMA.getOperand(0);
+ SDValue B = FMA.getOperand(1);
+ SDValue C = FMA.getOperand(2).getOperand(0);
+ SDValue D = FMA.getOperand(2).getOperand(1);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
+ }
+
// Look through FP_EXTEND nodes to do more combining.
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
@@ -11606,33 +12091,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// More folding opportunities when target permits.
if (Aggressive) {
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (CanFuse &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1, Flags), Flags);
- }
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (CanFuse &&
- N1->getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL &&
- N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0, Flags), Flags);
- }
-
-
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -11736,7 +12194,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
@@ -11756,13 +12214,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!AllowFusionGlobally && !isContractable(N))
return SDValue();
- const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -11773,19 +12231,43 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
};
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
- }
+ auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
+ if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
+ XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
+ Flags);
+ }
+ return SDValue();
+ };
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0, Flags);
+ auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
+ if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
+ YZ.getOperand(1), X, Flags);
+ }
+ return SDValue();
+ };
+
+ // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
+ (N0.getNode()->use_size() > N1.getNode()->use_size())) {
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
+ // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ } else {
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (SDValue V = tryToFoldXYSubZ(N0, N1))
+ return V;
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ if (SDValue V = tryToFoldXSubYZ(N0, N1))
+ return V;
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
@@ -11902,7 +12384,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N1.getOperand(2)) &&
- N1->hasOneUse()) {
+ N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -12055,7 +12537,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// Floating-point multiply-add with intermediate rounding. This can result
// in a less precise result due to the changed rounding order.
bool HasFMAD = Options.UnsafeFPMath &&
- (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ (LegalOperations && TLI.isFMADLegal(DAG, N));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -12132,6 +12614,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12155,18 +12640,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return NewSel;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
- return DAG.getNode(
- ISD::FSUB, DL, VT, N0,
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
- if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
- return DAG.getNode(
- ISD::FSUB, DL, VT, N1,
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12311,6 +12794,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12345,8 +12831,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
- return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
@@ -12364,10 +12851,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(
- ISD::FADD, DL, VT, N0,
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12378,21 +12864,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
-/// Return true if both inputs are at least as cheap in negated form and at
-/// least one input is strictly cheaper in negated form.
-bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
- if (char LHSNeg =
- TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
- if (char RHSNeg =
- TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
- // Both negated operands are at least as cheap as their counterparts.
- // Check to see if at least one is cheaper negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return true;
-
- return false;
-}
-
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12403,6 +12874,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
@@ -12464,13 +12938,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// -N0 * -N1 --> N0 * N1
- if (isCheaperToUseNegatedFPOps(N0, N1)) {
- SDValue NegN0 =
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
- }
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -12549,13 +13028,18 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
- if (isCheaperToUseNegatedFPOps(N0, N1)) {
- SDValue NegN0 =
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
- SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
- }
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
@@ -12641,13 +13125,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
// fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
- if (!TLI.isFNegFree(VT) &&
- TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
- ForCodeSize) == 2)
- return DAG.getNode(ISD::FNEG, DL, VT,
- TLI.getNegatedExpression(SDValue(N, 0), DAG,
- LegalOperations, ForCodeSize),
- Flags);
+ if (!TLI.isFNegFree(VT))
+ if (SDValue Neg = TLI.getCheaperNegatedExpression(
+ SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
return SDValue();
}
@@ -12664,7 +13145,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
- if (!UnsafeMath && !Flags.hasAllowReciprocal())
+ if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
@@ -12735,6 +13216,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -12794,37 +13278,62 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
// it's still worthwhile to get rid of the FSQRT if possible.
- SDValue SqrtOp;
- SDValue OtherOp;
+ SDValue Sqrt, Y;
if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- SqrtOp = N1.getOperand(0);
- OtherOp = N1.getOperand(1);
+ Sqrt = N1.getOperand(0);
+ Y = N1.getOperand(1);
} else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
- SqrtOp = N1.getOperand(1);
- OtherOp = N1.getOperand(0);
+ Sqrt = N1.getOperand(1);
+ Y = N1.getOperand(0);
}
- if (SqrtOp.getNode()) {
+ if (Sqrt.getNode()) {
+ // If the other multiply operand is known positive, pull it into the
+ // sqrt. That will eliminate the division if we convert to an estimate:
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // TODO: Also fold the case where A == Z (fabs is missing).
+ if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
+ Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
+ Y.getOperand(0), Flags);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
+
// We found a FSQRT, so try to make this fold:
- // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
- RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
+ if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
+ AddToWorklist(Div.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
}
}
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
- return RV;
+ if (Options.NoInfsFPMath || Flags.hasNoInfs())
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (isCheaperToUseNegatedFPOps(N0, N1))
- return DAG.getNode(
- ISD::FDIV, SDLoc(N), VT,
- TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+ TargetLowering::NegatibleCost CostN0 =
+ TargetLowering::NegatibleCost::Expensive;
+ TargetLowering::NegatibleCost CostN1 =
+ TargetLowering::NegatibleCost::Expensive;
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
+ if (NegN0 && NegN1 &&
+ (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
+ CostN1 == TargetLowering::NegatibleCost::Cheaper))
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
return SDValue();
}
@@ -12835,6 +13344,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ SDNodeFlags Flags = N->getFlags();
+
+ if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
+ return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
@@ -12848,8 +13361,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
SDNodeFlags Flags = N->getFlags();
- if (!DAG.getTarget().Options.UnsafeFPMath &&
- !Flags.hasApproximateFuncs())
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
+ // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
+ if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -13061,33 +13578,24 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
- // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
- !VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
+ }
- // fold (sint_to_fp (zext (setcc x, y, cc))) ->
- // (select_cc x, y, 1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(0).getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
@@ -13121,19 +13629,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
}
- // The next optimizations are desirable only if SELECT_CC can be lowered.
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
- // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
- if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
- SDLoc DL(N);
- SDValue Ops[] =
- { N0.getOperand(0), N0.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
- N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
- }
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
+ DAG.getConstantFP(0.0, DL, VT));
}
if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
@@ -13378,12 +13879,14 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
- if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
- return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ if (SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
+ return NegN0;
- // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
- // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
- // was called from a context with a nsz flag if the input fsub does not.
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
+ // know it was called from a context with a nsz flag if the input fsub does
+ // not.
if (N0.getOpcode() == ISD::FSUB &&
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
@@ -13539,8 +14042,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
if (N1.hasOneUse()) {
+ // rebuildSetCC calls visitXor which may change the Chain when there is a
+ // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
+ HandleSDNode ChainHandle(Chain);
if (SDValue NewN1 = rebuildSetCC(N1))
- return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
+ ChainHandle.getValue(), NewN1, N2);
}
return SDValue();
@@ -13592,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
}
}
- // Transform br(xor(x, y)) -> br(x != y)
- // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
+ // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
if (N.getOpcode() == ISD::XOR) {
// Because we may call this on a speculatively constructed
// SimplifiedSetCC Node, we need to simplify this node first.
@@ -13617,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (N.getOpcode() != ISD::XOR)
return N;
- SDNode *TheXor = N.getNode();
-
- SDValue Op0 = TheXor->getOperand(0);
- SDValue Op1 = TheXor->getOperand(1);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
- if (isOneConstant(Op0) && Op0.hasOneUse() &&
- Op0.getOpcode() == ISD::XOR) {
- TheXor = Op0.getNode();
+ // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
+ if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
+ Op0.getValueType() == MVT::i1) {
+ N = Op0;
+ Op0 = N->getOperand(0);
+ Op1 = N->getOperand(1);
Equal = true;
}
@@ -13634,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
// Replace the uses of XOR with SETCC
- return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
Equal ? ISD::SETEQ : ISD::SETNE);
}
}
@@ -13994,118 +14502,142 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return true;
}
-/// Try to combine a load/store with a add/sub of the base pointer node into a
-/// post-indexed load/store. The transformation folded the add/subtract into the
-/// new indexed load/store effectively and all of its uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
- if (Level < AfterLegalizeDAG)
+static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (PtrUse == N ||
+ (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
return false;
- bool IsLoad = true;
- bool IsMasked = false;
- SDValue Ptr;
- if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
- Ptr, TLI))
+ if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
return false;
- if (Ptr.getNode()->hasOneUse())
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
return false;
- for (SDNode *Op : Ptr.getNode()->uses()) {
- if (Op == N ||
- (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
- continue;
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
- SDValue BasePtr;
- SDValue Offset;
- ISD::MemIndexedMode AM = ISD::UNINDEXED;
- if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
- // Don't create a indexed load / store with zero offset.
- if (isNullConstant(Offset))
- continue;
+ SmallPtrSet<const SDNode *, 32> Visited;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
+ if (Use == Ptr.getNode())
+ continue;
- // Try turning it into a post-indexed load / store except when
- // 1) All uses are load / store ops that use it as base ptr (and
- // it may be folded as addressing mmode).
- // 2) Op must be independent of N, i.e. Op is neither a predecessor
- // nor a successor of N. Otherwise, if Op is folded that would
- // create a cycle.
+ // No if there's a later user which could perform the index instead.
+ if (isa<MemSDNode>(Use)) {
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue OtherPtr;
+ if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, OtherPtr, TLI)) {
+ SmallVector<const SDNode *, 2> Worklist;
+ Worklist.push_back(Use);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
+ return false;
+ }
+ }
- if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
- continue;
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
+ for (SDNode *UseUse : Use->uses())
+ if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ return false;
+ }
+ }
+ return true;
+}
- // Check for #1.
- bool TryNext = false;
- for (SDNode *Use : BasePtr.getNode()->uses()) {
- if (Use == Ptr.getNode())
- continue;
+static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
+ bool &IsMasked, SDValue &Ptr,
+ SDValue &BasePtr, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
+ IsMasked, Ptr, TLI) ||
+ Ptr.getNode()->hasOneUse())
+ return nullptr;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+ for (SDNode *Op : Ptr->uses()) {
+ // Check for #1.
+ if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
+ continue;
- // If all the uses are load / store addresses, then don't do the
- // transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
- bool RealUse = false;
- for (SDNode *UseUse : Use->uses()) {
- if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
- RealUse = true;
- }
+ // Check for #2.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // Ptr is predecessor to both N and Op.
+ Visited.insert(Ptr.getNode());
+ Worklist.push_back(N);
+ Worklist.push_back(Op);
+ if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
+ !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
+ return Op;
+ }
+ return nullptr;
+}
- if (!RealUse) {
- TryNext = true;
- break;
- }
- }
- }
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
- if (TryNext)
- continue;
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
+ Offset, AM, DAG, TLI);
+ if (!Op)
+ return false;
- // Check for #2.
- SmallPtrSet<const SDNode *, 32> Visited;
- SmallVector<const SDNode *, 8> Worklist;
- // Ptr is predecessor to both N and Op.
- Visited.insert(Ptr.getNode());
- Worklist.push_back(N);
- Worklist.push_back(Op);
- if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
- !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
- SDValue Result;
- if (!IsMasked)
- Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
- Offset, AM)
- : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
BasePtr, Offset, AM);
- else
- Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
- BasePtr, Offset, AM)
- : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
- BasePtr, Offset, AM);
- ++PostIndexedNodes;
- ++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
- if (IsLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
- } else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
- }
-
- // Finally, since the node is now dead, remove it from the graph.
- deleteAndRecombine(N);
-
- // Replace the uses of Use with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(IsLoad ? 1 : 0));
- deleteAndRecombine(Op);
- return true;
- }
- }
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
+ dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (IsLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
- return false;
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(IsLoad ? 1 : 0));
+ deleteAndRecombine(Op);
+ return true;
}
/// Return the base-pointer arithmetic from an indexed \p LD.
@@ -14222,11 +14754,11 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
- bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
- LD->getAddressingMode() == ISD::POST_DEC);
- unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
- SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
- LD->getOperand(1), LD->getOperand(2));
+ // Cannot handle opaque target constants and we must respect the user's
+ // request not to split indexes from loads.
+ if (!canSplitIdx(LD))
+ return SDValue();
+ SDValue Idx = SplitIndexingFromLoad(LD);
SDValue Ops[] = {Val, Idx, Chain};
return CombineTo(LD, Ops, 3);
}
@@ -14322,14 +14854,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// the indexing into an add/sub directly (that TargetConstant may not be
// valid for a different type of node, and we cannot convert an opaque
// target constant into a regular constant).
- bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
- cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+ bool CanSplitIdx = canSplitIdx(LD);
- if (!N->hasAnyUseOfValue(0) &&
- ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
+ if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
SDValue Index;
- if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+ if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
Index = SplitIndexingFromLoad(LD);
// Try to fold the base pointer arithmetic into subsequent loads and
// stores.
@@ -14356,11 +14886,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > LD->getAlign() &&
+ isAligned(*Alignment, LD->getSrcValueOffset())) {
SDValue NewLoad = DAG.getExtLoad(
LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), LD->getMemoryVT(), Align,
+ LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
LD->getMemOperand()->getFlags(), LD->getAAInfo());
// NewLoad will always be N as we are only refining the alignment
assert(NewLoad.getNode() == N);
@@ -14557,11 +15088,11 @@ struct LoadedSlice {
}
/// Get the alignment of the load used for this slice.
- unsigned getAlignment() const {
- unsigned Alignment = Origin->getAlignment();
+ Align getAlign() const {
+ Align Alignment = Origin->getAlign();
uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
- Alignment = MinAlign(Alignment, Alignment + Offset);
+ Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
return Alignment;
}
@@ -14657,8 +15188,8 @@ struct LoadedSlice {
// Create the load for the slice.
SDValue LastInst =
DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
- Origin->getPointerInfo().getWithOffset(Offset),
- getAlignment(), Origin->getMemOperand()->getFlags());
+ Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
+ Origin->getMemOperand()->getFlags());
// If the final type is not the same as the loaded type, this means that
// we have to pad with zero. Create a zero extend for that.
EVT FinalType = Inst->getValueType(0);
@@ -14699,10 +15230,10 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment constraint.
- unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
+ Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
ResVT.getTypeForEVT(*DAG->getContext()));
- if (RequiredAlignment > getAlignment())
+ if (RequiredAlignment > getAlign())
return false;
// 2. Check that the load is a legal operation for that type.
@@ -14788,14 +15319,14 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
continue;
// Check if the target supplies paired loads for this type.
- unsigned RequiredAlignment = 0;
+ Align RequiredAlignment;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
Second = nullptr;
continue;
}
// Check if we meet the alignment requirement.
- if (RequiredAlignment > First->getAlignment())
+ if (First->getAlign() < RequiredAlignment)
continue;
// Check that both loads are next to each other in memory.
@@ -14868,6 +15399,12 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
!LD->getValueType(0).isInteger())
return false;
+ // The algorithm to split up a load of a scalable vector into individual
+ // elements currently requires knowing the length of the loaded type,
+ // so will need adjusting to work on scalable vectors.
+ if (LD->getValueType(0).isScalableVector())
+ return false;
+
// Keep track of already used bits to detect overlapping values.
// In that case, we will just abort the transformation.
APInt UsedBits(LD->getValueSizeInBits(0), 0);
@@ -15112,7 +15649,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
// Y is known to provide just those bytes. If so, we try to replace the
// load + replace + store sequence with a single (narrower) store, which makes
// the load dead.
- if (Opc == ISD::OR) {
+ if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
@@ -15128,6 +15665,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return NewST;
}
+ if (!EnableReduceLoadOpStoreWidth)
+ return SDValue();
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -15181,9 +15721,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
- unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
+ if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
@@ -15229,17 +15769,24 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
ST->getPointerInfo().getAddrSpace() != 0)
return SDValue();
- EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ TypeSize VTSize = VT.getSizeInBits();
+
+ // We don't know the size of scalable types at compile time so we cannot
+ // create an integer of the equivalent size.
+ if (VTSize.isScalable())
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
return SDValue();
- unsigned LDAlign = LD->getAlignment();
- unsigned STAlign = ST->getAlignment();
+ Align LDAlign = LD->getAlign();
+ Align STAlign = ST->getAlign();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
+ Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
@@ -15356,7 +15903,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
return DAG.getTokenFactor(StoreDL, Chains);
}
-bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
@@ -15530,14 +16077,12 @@ void DAGCombiner::getStoreMergeCandidates(
if (BasePtr.getBase().isUndef())
return;
- bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
- bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsLoadSrc = isa<LoadSDNode>(Val);
+ StoreSource StoreSrc = getStoreSource(Val);
+ assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
EVT LoadVT;
- if (IsLoadSrc) {
+ if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
@@ -15565,7 +16110,7 @@ void DAGCombiner::getStoreMergeCandidates(
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
- if (IsLoadSrc) {
+ if (StoreSrc == StoreSource::Load) {
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match
@@ -15589,13 +16134,13 @@ void DAGCombiner::getStoreMergeCandidates(
} else
return false;
}
- if (IsConstantSrc) {
+ if (StoreSrc == StoreSource::Constant) {
if (NoTypeMatch)
return false;
if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
}
- if (IsExtractVecSrc) {
+ if (StoreSrc == StoreSource::Extract) {
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
@@ -15736,77 +16281,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
- return false;
-
- EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getStoreSize();
- unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
-
- if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
- return false;
-
- bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
-
- // This function cannot currently deal with non-byte-sized memory sizes.
- if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
- return false;
-
- if (!MemVT.isSimple())
- return false;
-
- // Perform an early exit check. Do not bother looking at stored values that
- // are not constants, loads, or extracted vector elements.
- SDValue StoredVal = peekThroughBitcasts(St->getValue());
- bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
- bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
- isa<ConstantFPSDNode>(StoredVal);
- bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsNonTemporalStore = St->isNonTemporal();
- bool IsNonTemporalLoad =
- IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
-
- if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
- return false;
-
- SmallVector<MemOpLink, 8> StoreNodes;
- SDNode *RootNode;
- // Find potential store merge candidates by searching through chain sub-DAG
- getStoreMergeCandidates(St, StoreNodes, RootNode);
-
- // Check if there is anything to merge.
- if (StoreNodes.size() < 2)
- return false;
-
- // Sort the memory operands according to their distance from the
- // base pointer.
- llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- });
-
- // Store Merge attempts to merge the lowest stores. This generally
- // works out as if successful, as the remaining stores are checked
- // after the first collection of stores is merged. However, in the
- // case that a non-mergeable store is found first, e.g., {p[-2],
- // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
- // mergeable cases. To prevent this, we prune such stores from the
- // front of StoreNodes here.
-
- bool RV = false;
- while (StoreNodes.size() > 1) {
+unsigned
+DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
+ int64_t ElementSizeBytes) const {
+ while (true) {
+ // Find a store past the width of the first store.
size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
- StoreNodes[StartIdx + 1].OffsetFromBase)
+ StoreNodes[StartIdx + 1].OffsetFromBase)
++StartIdx;
// Bail if we don't have enough candidates to merge.
if (StartIdx + 1 >= StoreNodes.size())
- return RV;
+ return 0;
+ // Trim stores that overlapped with the first store.
if (StartIdx)
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
@@ -15822,302 +16312,345 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
break;
NumConsecutiveStores = i + 1;
}
+ if (NumConsecutiveStores > 1)
+ return NumConsecutiveStores;
- if (NumConsecutiveStores < 2) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumConsecutiveStores);
- continue;
- }
-
- // The node with the lowest store address.
- LLVMContext &Context = *DAG.getContext();
- const DataLayout &DL = DAG.getDataLayout();
-
- // Store the constants into memory as one consecutive store.
- if (IsConstantSrc) {
- while (NumConsecutiveStores >= 2) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned LastLegalType = 1;
- unsigned LastLegalVectorType = 1;
- bool LastIntegerTrunc = false;
- bool NonZero = false;
- unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = ST->getValue();
- bool IsElementZero = false;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
- else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
- IsElementZero = C->getConstantFPValue()->isNullValue();
- if (IsElementZero) {
- if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
- FirstZeroAfterNonZero = i;
- }
- NonZero |= !IsElementZero;
-
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast = false;
+ // There are no consecutive stores at the start of the list.
+ // Remove the first store and try again.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ }
+}
- // Break early when size is too large to be legal.
- if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+bool DAGCombiner::tryStoreMergeOfConstants(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode, bool AllowVectors) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Store the constants into memory as one consecutive store.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
+ }
+ NonZero |= !IsElementZero;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast) {
- LastIntegerTrunc = false;
- LastLegalType = i + 1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(),
- &IsFast) &&
- IsFast) {
- LastIntegerTrunc = true;
- LastLegalType = i + 1;
- }
- }
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
- // We only use vectors if the constant is known to be zero or the
- // target allows it and the function is not marked with the
- // noimplicitfloat attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(
- Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast)
- LastLegalVectorType = i + 1;
- }
- }
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
- // Check if we found a legal integer type that creates a meaningful
- // merge.
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved or we've dropped a non-zero value. Drop as many
- // candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
-
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = false;
+ LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
}
+ }
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
- RootNode)) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- continue;
- }
+ // We only use vectors if the constant is known to be zero or the
+ // target allows it and the function is not marked with the
+ // noimplicitfloat attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ AllowVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
+ }
- RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
- UseVector, LastIntegerTrunc);
+ bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
+ // Check if we found a legal integer type that creates a meaningful
+ // merge.
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Remove merged stores for next iteration.
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
continue;
}
- // When extracting multiple vector elements, try to store them
- // in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecSrc) {
- // Loop on Consecutive Stores on success.
- while (NumConsecutiveStores >= 2) {
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- unsigned NumStoresToMerge = 1;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT Ty =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
-
- // Break early when size is too large to be legal.
- if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+ MadeChange |= mergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- if (TLI.isTypeLegal(Ty) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty,
- *FirstInChain->getMemOperand(), &IsFast) &&
- IsFast)
- NumStoresToMerge = i + 1;
- }
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
- // Check if we found a legal integer type creating a meaningful
- // merge.
- if (NumStoresToMerge < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have, is if the alignment has
- // improved. Drop as many candidates as we can here.
- unsigned NumSkip = 1;
- while (
- (NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
-
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
- }
+bool DAGCombiner::tryStoreMergeOfExtracts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
+ EVT MemVT, SDNode *RootNode) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
+
+ // Loop on Consecutive Stores on success.
+ while (NumConsecutiveStores >= 2) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast = false;
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(
- StoreNodes, NumStoresToMerge, RootNode)) {
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- NumConsecutiveStores -= NumStoresToMerge;
- continue;
- }
+ // Break early when size is too large to be legal.
+ if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- RV |= MergeStoresOfConstantsOrVecElts(
- StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
+
+ // Check if we found a legal integer type creating a meaningful
+ // merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- StoreNodes.erase(StoreNodes.begin(),
- StoreNodes.begin() + NumStoresToMerge);
- NumConsecutiveStores -= NumStoresToMerge;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
continue;
}
- // Below we handle the case of multiple consecutive stores that
- // come from multiple consecutive loads. We merge them into a single
- // wide load and a single wide store.
+ MadeChange |= mergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
+ NumConsecutiveStores -= NumStoresToMerge;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumConsecutiveStores, EVT MemVT,
+ SDNode *RootNode, bool AllowVectors,
+ bool IsNonTemporalStore,
+ bool IsNonTemporalLoad) {
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ bool MadeChange = false;
- // Look for load nodes which are used by the stored values.
- SmallVector<MemOpLink, 8> LoadNodes;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- // Find acceptable loads. Loads need to have the same chain (token factor),
- // must not be zext, volatile, indexed, and they must be consecutive.
- BaseIndexOffset LdBasePtr;
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
- for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = peekThroughBitcasts(St->getValue());
- LoadSDNode *Ld = cast<LoadSDNode>(Val);
-
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
- // If this is not the first ptr that we check.
- int64_t LdOffset = 0;
- if (LdBasePtr.getBase().getNode()) {
- // The base ptr must be the same.
- if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
- break;
- } else {
- // Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr;
- }
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = peekThroughBitcasts(St->getValue());
+ LoadSDNode *Ld = cast<LoadSDNode>(Val);
- // We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
+ // If this is not the first ptr that we check.
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
+ // The base ptr must be the same.
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
}
- while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
+ }
+
+ while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+ Align RequiredAlignment;
+ bool NeedRotate = false;
+ if (LoadNodes.size() == 2) {
// If we have load/store pair instructions and we only have two values,
// don't bother merging.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 &&
- TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
break;
}
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
-
- // Scan the memory operations on the chain and find the first
- // non-consecutive load memory address. These variables hold the index in
- // the store node array.
-
- unsigned LastConsecutiveLoad = 1;
-
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 1;
- unsigned LastLegalIntegerType = 1;
- bool isDereferenceable = true;
- bool DoIntegerTruncate = false;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ // If the loads are reversed, see if we can rotate the halves into place.
+ int64_t Offset0 = LoadNodes[0].OffsetFromBase;
+ int64_t Offset1 = LoadNodes[1].OffsetFromBase;
+ EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
+ if (Offset0 - Offset1 == ElementSizeBytes &&
+ (hasOperation(ISD::ROTL, PairVT) ||
+ hasOperation(ISD::ROTR, PairVT))) {
+ std::swap(LoadNodes[0], LoadNodes[1]);
+ NeedRotate = true;
+ }
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+
+ unsigned LastConsecutiveLoad = 1;
+
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue LoadChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != LoadChain)
+ break;
- if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
- isDereferenceable = false;
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
- // Find a legal type for the vector store.
- unsigned Elts = (i + 1) * NumMemElts;
- EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
- // Break early when size is too large to be legal.
- if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
- break;
+ // Find a legal type for the vector store.
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(), &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstLoad->getMemOperand(), &IsFastLd) &&
- IsFastLd) {
- LastLegalVectorType = i + 1;
- }
+ // Break early when size is too large to be legal.
+ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
+ break;
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ bool IsFastSt = false;
+ bool IsFastLd = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
+
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -16125,149 +16658,225 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
*FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
- DoIntegerTruncate = false;
- // Or check whether a truncstore and extload is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
- StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstInChain->getMemOperand(),
- &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy,
- *FirstLoad->getMemOperand(), &IsFastLd) &&
- IsFastLd) {
- LastLegalIntegerType = i + 1;
- DoIntegerTruncate = true;
- }
+ DoIntegerTruncate = true;
}
}
+ }
- // Only use vector types if the vector type is larger than the integer
- // type. If they are the same, use integers.
- bool UseVectorTy =
- LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType =
- std::max(LastLegalVectorType, LastLegalIntegerType);
-
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem =
- std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
- NumElem = std::min(LastLegalType, NumElem);
-
- if (NumElem < 2) {
- // We know that candidate stores are in order and of correct
- // shape. While there is no mergeable sequence from the
- // beginning one may start later in the sequence. The only
- // reason a merge of size N could have failed where another of
- // the same size would not have is if the alignment or either
- // the load or store has improved. Drop as many candidates as we
- // can here.
- unsigned NumSkip = 1;
- while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
- NumSkip++;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
- NumConsecutiveStores -= NumSkip;
- continue;
- }
+ // Only use vector types if the vector type is larger than the integer
+ // type. If they are the same, use integers.
+ bool UseVectorTy =
+ LastLegalVectorType > LastLegalIntegerType && AllowVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
+ NumConsecutiveStores -= NumSkip;
+ continue;
+ }
- // Check that we can merge these candidates without causing a cycle.
- if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
- RootNode)) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
- continue;
- }
+ // Check that we can merge these candidates without causing a cycle.
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
+ RootNode)) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ continue;
+ }
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- // Find a legal type for the vector store.
- unsigned Elts = NumElem * NumMemElts;
- JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags LdMMOFlags =
+ isDereferenceable ? MachineMemOperand::MODereferenceable
+ : MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
+ ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(
+ JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+ SDValue StoreOp = NewLoad;
+ if (NeedRotate) {
+ unsigned LoadWidth = ElementSizeBytes * 8 * 2;
+ assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
+ "Unexpected type for rotate-able load pair");
+ SDValue RotAmt =
+ DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
+ // Target can convert to the identical ROTR if it does not have ROTL.
+ StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
}
+ NewStore = DAG.getStore(
+ NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
+ FirstLoad->getChain(), FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), JointMemOpVT,
+ FirstLoadAlign, LdMMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ // Replace all stores with the new store. Recursively remove corresponding
+ // values if they are no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
+ MadeChange = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
+ NumConsecutiveStores -= NumElem;
+ }
+ return MadeChange;
+}
+
+bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
+ return false;
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
-
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
-
- SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
- AddToWorklist(NewStoreChain.getNode());
-
- MachineMemOperand::Flags LdMMOFlags =
- isDereferenceable ? MachineMemOperand::MODereferenceable
- : MachineMemOperand::MONone;
- if (IsNonTemporalLoad)
- LdMMOFlags |= MachineMemOperand::MONonTemporal;
-
- MachineMemOperand::Flags StMMOFlags =
- IsNonTemporalStore ? MachineMemOperand::MONonTemporal
- : MachineMemOperand::MONone;
-
- SDValue NewLoad, NewStore;
- if (UseVectorTy || !DoIntegerTruncate) {
- NewLoad =
- DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getStore(
- NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
- } else { // This must be the truncstore/extload case
- EVT ExtendedTy =
- TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
- NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
- FirstLoad->getChain(), FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), JointMemOpVT,
- FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- JointMemOpVT, FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
- }
+ // TODO: Extend this function to merge stores of scalable vectors.
+ // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
+ // store since we know <vscale x 16 x i8> is exactly twice as large as
+ // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
+ EVT MemVT = St->getMemoryVT();
+ if (MemVT.isScalableVector())
+ return false;
+ if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
+ return false;
- // Replace the all stores with the new store. Recursively remove
- // corresponding value if its no longer used.
- for (unsigned i = 0; i < NumElem; ++i) {
- SDValue Val = StoreNodes[i].MemNode->getOperand(1);
- CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
- recursivelyDeleteUnusedNodes(Val.getNode());
- }
+ // Do not bother looking at stored values that are not constants, loads, or
+ // extracted vector elements.
+ SDValue StoredVal = peekThroughBitcasts(St->getValue());
+ const StoreSource StoreSrc = getStoreSource(StoredVal);
+ if (StoreSrc == StoreSource::Unknown)
+ return false;
- RV = true;
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
- NumConsecutiveStores -= NumElem;
+ SmallVector<MemOpLink, 8> StoreNodes;
+ SDNode *RootNode;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes, RootNode);
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the
+ // base pointer.
+ llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
+
+ bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
+ cast<LoadSDNode>(StoredVal)->isNonTemporal();
+
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+ bool MadeChange = false;
+ while (StoreNodes.size() > 1) {
+ unsigned NumConsecutiveStores =
+ getConsecutiveStores(StoreNodes, ElementSizeBytes);
+ // There are no more stores in the list to examine.
+ if (NumConsecutiveStores == 0)
+ return MadeChange;
+
+ // We have at least 2 consecutive stores. Try to merge them.
+ assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
+ switch (StoreSrc) {
+ case StoreSource::Constant:
+ MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors);
+ break;
+
+ case StoreSource::Extract:
+ MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode);
+ break;
+
+ case StoreSource::Load:
+ MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
+ MemVT, RootNode, AllowVectors,
+ IsNonTemporalStore, IsNonTemporalLoad);
+ break;
+
+ default:
+ llvm_unreachable("Unhandled store source type");
}
}
- return RV;
+ return MadeChange;
}
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
@@ -16408,11 +17017,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
+ if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
+ if (*Alignment > ST->getAlign() &&
+ isAligned(*Alignment, ST->getSrcValueOffset())) {
SDValue NewStore =
DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
- ST->getMemoryVT(), Align,
+ ST->getMemoryVT(), *Alignment,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
// NewStore will always be N as we are only refining the alignment
assert(NewStore.getNode() == N);
@@ -16497,7 +17107,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
- !ST1->getBasePtr().isUndef()) {
+ !ST1->getBasePtr().isUndef() &&
+ // BaseIndexOffset and the code below requires knowing the size
+ // of a vector, so bail out if MemoryVT is scalable.
+ !ST1->getMemoryVT().isScalableVector()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
@@ -16510,33 +17123,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
CombineTo(ST1, ST1->getChain());
return SDValue();
}
-
- // If ST stores to a subset of preceding store's write set, we may be
- // able to fold ST's value into the preceding stored value. As we know
- // the other uses of ST1's chain are unconcerned with ST, this folding
- // will not affect those nodes.
- int64_t BitOffset;
- if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
- BitOffset)) {
- SDValue ChainValue = ST1->getValue();
- if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
- if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = C1->getAPIntValue();
- APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
- // FIXME: Handle Big-endian mode.
- if (!DAG.getDataLayout().isBigEndian()) {
- Val.insertBits(InsertVal, BitOffset);
- SDValue NewSDVal =
- DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
- C1->isTargetOpcode(), C1->isOpaque());
- SDNode *NewST1 = DAG.UpdateNodeOperands(
- ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
- ST1->getOperand(3));
- return CombineTo(ST, SDValue(NewST1, 0));
- }
- }
- }
- } // End ST subset of ST1 case.
}
}
}
@@ -16559,7 +17145,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- bool Changed = MergeConsecutiveStores(ST);
+ bool Changed = mergeConsecutiveStores(ST);
if (!Changed) break;
// Return N as merge only uses CombineTo and no worklist clean
// up is necessary.
@@ -16835,6 +17421,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
EVT SubVecVT = SubVec.getValueType();
EVT VT = DestVec.getValueType();
unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ // If the source only has a single vector element, the cost of creating adding
+ // it to a vector is likely to exceed the cost of a insert_vector_elt.
+ if (NumSrcElts == 1)
+ return SDValue();
unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
unsigned NumMaskVals = ExtendRatio * NumSrcElts;
@@ -16880,12 +17470,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDLoc DL(N);
EVT VT = InVec.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
// Insert into out-of-bounds element is undefined.
- if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
- if (IndexC->getZExtValue() >= VT.getVectorNumElements())
- return DAG.getUNDEF(VT);
+ if (IndexC && VT.isFixedLengthVector() &&
+ IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
@@ -16893,17 +17483,25 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
return InVec;
- auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
if (!IndexC) {
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
- SmallVector<SDValue, 8> Ops(NumElts, InVal);
- return DAG.getBuildVector(VT, DL, Ops);
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, InVal);
+ else {
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
}
return SDValue();
}
+ if (VT.isScalableVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
@@ -16968,11 +17566,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
- unsigned Align = OriginalLoad->getAlignment();
- unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ Align Alignment = OriginalLoad->getAlign();
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ if (NewAlign > Alignment ||
+ !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
@@ -16980,7 +17579,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Align = NewAlign;
+ Alignment = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
SDValue Offset;
@@ -17020,13 +17619,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
: ISD::EXTLOAD;
Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
- Align, OriginalLoad->getMemOperand()->getFlags(),
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
- Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
- MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
- OriginalLoad->getAAInfo());
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
@@ -17102,6 +17701,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Only 0'th element of SCALAR_TO_VECTOR is defined.
+ if (DAG.isKnownNeverZero(Index))
+ return DAG.getUNDEF(ScalarVT);
+
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
@@ -17115,15 +17718,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// extract_vector_elt of out-of-bounds element -> UNDEF
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- unsigned NumElts = VecVT.getVectorNumElements();
- if (IndexC && IndexC->getAPIntValue().uge(NumElts))
+ if (IndexC && VecVT.isFixedLengthVector() &&
+ IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
return DAG.getUNDEF(ScalarVT);
// extract_vector_elt (build_vector x, y), 1 -> y
- if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
+ if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
+ VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
TLI.isTypeLegal(VecVT) &&
(VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
- SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
+ assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
+ VecVT.isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned IndexVal =
+ VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
+ SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();
// Sometimes build_vector's scalar input types do not match result type.
@@ -17134,6 +17743,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
+ if (VecVT.isScalableVector())
+ return SDValue();
+
+ // All the code from this point onwards assumes fixed width vectors, but it's
+ // possible that some of the combinations could be made to work for scalable
+ // vectors too.
+ unsigned NumElts = VecVT.getVectorNumElements();
+ unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
+
// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.
@@ -17157,7 +17775,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
"Extract element and scalar to vector can't change element type "
"from FP to integer.");
unsigned XBitWidth = X.getValueSizeInBits();
- unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
// An extract element return value type can be wider than its vector
@@ -17215,9 +17832,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// FIXME: Should really be just isOperationLegalOrCustom.
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
- EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
- DAG.getConstant(OrigElt, DL, IndexTy));
+ DAG.getVectorIdxConstant(OrigElt, DL));
}
}
@@ -17241,6 +17857,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
+ APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
+ // We simplified the vector operand of this extract element. If this
+ // extract is not dead, visit it again so it is folded properly.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// Everything under here is trying to match an extract of a loaded value.
@@ -17326,6 +17950,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
Index = DAG.getConstant(Elt, DL, Index.getValueType());
}
+ } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
+ VecVT.getVectorElementType() == ScalarVT &&
+ (!LegalTypes ||
+ TLI.isTypeLegal(
+ VecOp.getOperand(0).getValueType().getVectorElementType()))) {
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
+ // -> extract_vector_elt a, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
+ // -> extract_vector_elt a, 1
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
+ // -> extract_vector_elt b, 0
+ // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
+ // -> extract_vector_elt b, 1
+ SDLoc SL(N);
+ EVT ConcatVT = VecOp.getOperand(0).getValueType();
+ unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
+ SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
+ Index.getValueType());
+
+ SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
+ ConcatVT.getVectorElementType(),
+ ConcatOp, NewIdx);
+ return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
}
// Make sure we found a non-volatile load and the extractelement is
@@ -17407,6 +18055,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!ValidTypes)
return SDValue();
+ // If we already have a splat buildvector, then don't fold it if it means
+ // introducing zeros.
+ if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
+ return SDValue();
+
bool isLE = DAG.getDataLayout().isLittleEndian();
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
@@ -17453,12 +18106,89 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return DAG.getBitcast(VT, BV);
}
+// Simplify (build_vec (trunc $1)
+// (trunc (srl $1 half-width))
+// (trunc (srl $1 (2 * half-width))) …)
+// to (bitcast $1)
+SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
+
+ // Only for little endian
+ if (!DAG.getDataLayout().isLittleEndian())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT OutScalarTy = VT.getScalarType();
+ uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
+
+ // Only for power of two types to be sure that bitcast works well
+ if (!isPowerOf2_64(ScalarTypeBitsize))
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+
+ // Look through bitcasts
+ auto PeekThroughBitcast = [](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST)
+ return Op.getOperand(0);
+ return Op;
+ };
+
+ // The source value where all the parts are extracted.
+ SDValue Src;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = PeekThroughBitcast(N->getOperand(i));
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ In = PeekThroughBitcast(In.getOperand(0));
+
+ if (In.getOpcode() != ISD::SRL) {
+ // For now only build_vec without shuffling, handle shifts here in the
+ // future.
+ if (i != 0)
+ return SDValue();
+
+ Src = In;
+ } else {
+ // In is SRL
+ SDValue part = PeekThroughBitcast(In.getOperand(0));
+
+ if (!Src) {
+ Src = part;
+ } else if (Src != part) {
+ // Vector parts do not stem from the same variable
+ return SDValue();
+ }
+
+ SDValue ShiftAmtVal = In.getOperand(1);
+ if (!isa<ConstantSDNode>(ShiftAmtVal))
+ return SDValue();
+
+ uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
+
+ // The extracted value is not extracted at the right position
+ if (ShiftAmt != i * ScalarTypeBitsize)
+ return SDValue();
+ }
+ }
+
+ // Only cast if the size is the same
+ if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getBitcast(VT, Src);
+}
+
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx, bool DidSplitVec) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
EVT VT = N->getValueType(0);
EVT InVT1 = VecIn1.getValueType();
@@ -17492,7 +18222,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// If we only have one input vector, and it's twice the size of the
// output, split it in two.
VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
- DAG.getConstant(NumElems, DL, IdxTy));
+ DAG.getVectorIdxConstant(NumElems, DL));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
@@ -17699,6 +18429,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
+ if (ExtractedFromVec.getValueType().isScalableVector())
+ return SDValue();
+
const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
@@ -17733,7 +18466,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
unsigned NearestPow2 = 0;
SDValue Vec = VecIn.back();
EVT InVT = Vec.getValueType();
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<unsigned, 8> IndexVec(NumElems, 0);
for (unsigned i = 0; i < NumElems; i++) {
@@ -17752,9 +18484,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
InVT.getVectorElementType(), SplitSize);
if (TLI.isTypeLegal(SplitVT)) {
SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
- DAG.getConstant(SplitSize, DL, IdxTy));
+ DAG.getVectorIdxConstant(SplitSize, DL));
SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
- DAG.getConstant(0, DL, IdxTy));
+ DAG.getVectorIdxConstant(0, DL));
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
@@ -17986,6 +18718,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
+ if (SDValue V = reduceBuildVecTruncToBitCast(N))
+ return V;
+
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -18080,6 +18815,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
// What vector are we extracting the subvector from and at what index?
SDValue ExtVec = Op.getOperand(0);
+ int ExtIdx = Op.getConstantOperandVal(1);
// We want the EVT of the original extraction to correctly scale the
// extraction index.
@@ -18092,10 +18828,6 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
continue;
}
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
- int ExtIdx = Op.getConstantOperandVal(1);
-
// Ensure that we are extracting a subvector from a vector the same
// size as the result.
if (ExtVT.getSizeInBits() != VT.getSizeInBits())
@@ -18129,6 +18861,69 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
DAG.getBitcast(VT, SV1), Mask, DAG);
}
+static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
+ unsigned CastOpcode = N->getOperand(0).getOpcode();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // TODO: Allow more opcodes?
+ // case ISD::BITCAST:
+ // case ISD::TRUNCATE:
+ // case ISD::ZERO_EXTEND:
+ // case ISD::SIGN_EXTEND:
+ // case ISD::FP_EXTEND:
+ break;
+ default:
+ return SDValue();
+ }
+
+ EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
+ if (!SrcVT.isVector())
+ return SDValue();
+
+ // All operands of the concat must be the same kind of cast from the same
+ // source type.
+ SmallVector<SDValue, 4> SrcOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
+ Op.getOperand(0).getValueType() != SrcVT)
+ return SDValue();
+ SrcOps.push_back(Op.getOperand(0));
+ }
+
+ // The wider cast must be supported by the target. This is unusual because
+ // the operation support type parameter depends on the opcode. In addition,
+ // check the other type in the cast to make sure this is really legal.
+ EVT VT = N->getValueType(0);
+ EVT SrcEltVT = SrcVT.getVectorElementType();
+ unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
+ EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ switch (CastOpcode) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
+ !TLI.isTypeLegal(VT))
+ return SDValue();
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
+ !TLI.isTypeLegal(ConcatSrcVT))
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unexpected cast opcode");
+ }
+
+ // concat (cast X), (cast Y)... -> cast (concat X, Y...)
+ SDLoc DL(N);
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
+ return DAG.getNode(CastOpcode, DL, VT, NewConcat);
+}
+
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If we only have one input vector, we don't need to do any concatenation.
if (N->getNumOperands() == 1)
@@ -18256,6 +19051,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ if (SDValue V = combineConcatVectorOfCasts(N, DAG))
+ return V;
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -18287,14 +19085,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
- auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- // The extract index must be constant.
- if (!CS)
- return SDValue();
-
// Check that we are reading from the identity index.
unsigned IdentityIndex = i * PartNumElem;
- if (CS->getAPIntValue() != IdentityIndex)
+ if (Op.getConstantOperandAPInt(1) != IdentityIndex)
return SDValue();
}
@@ -18377,6 +19170,15 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
+ // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
+ // reduced to the unary fneg when it is visited, and we probably want to deal
+ // with fneg in a target-specific way.
+ if (BOpcode == ISD::FSUB) {
+ auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
+ if (C && C->getValueAPF().isNegZero())
+ return SDValue();
+ }
+
// The binop must be a vector type, so we can extract some fraction of it.
EVT WideBVT = BinOp.getValueType();
if (!WideBVT.isVector())
@@ -18412,12 +19214,11 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// bitcasted.
unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
- EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
// extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
SDLoc DL(Extract);
- SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), NewExtIndex);
SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
@@ -18457,7 +19258,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
// extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
SDLoc DL(Extract);
- SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(0), IndexC);
@@ -18489,6 +19290,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// Allow targets to opt-out.
EVT VT = Extract->getValueType(0);
+
+ // We can only create byte sized loads.
+ if (!VT.isByteSized())
+ return SDValue();
+
+ unsigned Index = ExtIdx->getZExtValue();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If the index is a multiple of the extract element count, we can offset the
+ // address by the store size multiplied by the subvector index. Otherwise if
+ // the scalar type is byte sized, we can just use the index multiplied by
+ // the element size in bytes as the offset.
+ unsigned Offset;
+ if (Index % NumElts == 0)
+ Offset = (Index / NumElts) * VT.getStoreSize();
+ else if (VT.getScalarType().isByteSized())
+ Offset = Index * VT.getScalarType().getStoreSize();
+ else
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
return SDValue();
@@ -18496,8 +19317,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getOperand(1);
- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+ SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
@@ -18512,6 +19332,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
+ uint64_t ExtIdx = N->getConstantOperandVal(1);
// Extract from UNDEF is UNDEF.
if (V.isUndef())
@@ -18523,9 +19344,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Combine an extract of an extract into a single extract_subvector.
// ext (ext X, C), 0 --> ext X, C
- SDValue Index = N->getOperand(1);
- if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
V.getConstantOperandVal(1)) &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
@@ -18536,21 +19355,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
- if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ if (V.getOpcode() == ISD::BITCAST &&
V.getOperand(0).getValueType().isVector()) {
SDValue SrcOp = V.getOperand(0);
EVT SrcVT = SrcOp.getValueType();
- unsigned SrcNumElts = SrcVT.getVectorNumElements();
- unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
if ((SrcNumElts % DestNumElts) == 0) {
unsigned SrcDestRatio = SrcNumElts / DestNumElts;
- unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
- NewExtNumElts);
+ NewExtEC);
if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
SDLoc DL(N);
- SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
@@ -18558,34 +19376,43 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
- unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
- EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
- SrcVT.getScalarType(), NewExtNumElts);
- if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
- TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
- unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
+ ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
+ EVT ScalarVT = SrcVT.getScalarType();
+ if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
- SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
- SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
- V.getOperand(0), NewIndex);
- return DAG.getBitcast(NVT, NewExtract);
+ unsigned IndexValScaled = ExtIdx / DestSrcRatio;
+ EVT NewExtVT =
+ EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ if (NewExtEC == 1 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
+ SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
+ SDValue NewExtract =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
}
}
}
}
- if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS) {
+ unsigned ExtNumElts = NVT.getVectorMinNumElements();
EVT ConcatSrcVT = V.getOperand(0).getValueType();
assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
"Concat and extract subvector do not change element type");
-
- unsigned ExtIdx = N->getConstantOperandVal(1);
- unsigned ExtNumElts = NVT.getVectorNumElements();
- assert(ExtIdx % ExtNumElts == 0 &&
+ assert((ExtIdx % ExtNumElts) == 0 &&
"Extract index is not a multiple of the input vector length.");
- unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
// If the concatenated source types match this extract, it's a direct
@@ -18599,15 +19426,14 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
"Trying to extract from >1 concat operand?");
assert(NewExtIdx % ExtNumElts == 0 &&
"Extract index is not a multiple of the input vector length.");
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+ SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
V.getOperand(ConcatOpIdx), NewIndexC);
}
@@ -18617,37 +19443,33 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
- if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
- EVT InVT = V.getValueType();
- unsigned ExtractSize = NVT.getSizeInBits();
- unsigned EltSize = InVT.getScalarSizeInBits();
- // Only do this if we won't split any elements.
- if (ExtractSize % EltSize == 0) {
- unsigned NumElems = ExtractSize / EltSize;
- EVT EltVT = InVT.getVectorElementType();
- EVT ExtractVT = NumElems == 1 ? EltVT
- : EVT::getVectorVT(*DAG.getContext(),
- EltVT, NumElems);
- if ((Level < AfterLegalizeDAG ||
- (NumElems == 1 ||
- TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
- (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
- unsigned IdxVal = IdxC->getZExtValue();
- IdxVal *= NVT.getScalarSizeInBits();
- IdxVal /= EltSize;
-
- if (NumElems == 1) {
- SDValue Src = V->getOperand(IdxVal);
- if (EltVT != Src.getValueType())
- Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
- return DAG.getBitcast(NVT, Src);
- }
-
- // Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(
- ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
- return DAG.getBitcast(NVT, BuildVec);
+ EVT InVT = V.getValueType();
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT EltVT = InVT.getVectorElementType();
+ EVT ExtractVT =
+ NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ if ((Level < AfterLegalizeDAG ||
+ (NumElems == 1 ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
+
+ if (NumElems == 1) {
+ SDValue Src = V->getOperand(IdxVal);
+ if (EltVT != Src.getValueType())
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+ return DAG.getBitcast(NVT, Src);
}
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+ V->ops().slice(IdxVal, NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
}
}
}
@@ -18659,23 +19481,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants.
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
- auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
- if (InsIdx && ExtIdx) {
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indices are equal or bit offsets are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
- ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
- return DAG.getBitcast(NVT, V.getOperand(1));
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
- DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
- Index);
- }
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ uint64_t InsIdx = V.getConstantOperandVal(2);
+ if (InsIdx * SmallVT.getScalarSizeInBits() ==
+ ExtIdx * NVT.getScalarSizeInBits())
+ return DAG.getBitcast(NVT, V.getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
+ N->getOperand(1));
}
if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
@@ -19064,6 +19882,57 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
NewMask);
}
+/// Combine shuffle of shuffle of the form:
+/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
+static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
+ SelectionDAG &DAG) {
+ if (!OuterShuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
+ if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> OuterMask = OuterShuf->getMask();
+ ArrayRef<int> InnerMask = InnerShuf->getMask();
+ unsigned NumElts = OuterMask.size();
+ assert(NumElts == InnerMask.size() && "Mask length mismatch");
+ SmallVector<int, 32> CombinedMask(NumElts, -1);
+ int SplatIndex = -1;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // Undef lanes remain undef.
+ int OuterMaskElt = OuterMask[i];
+ if (OuterMaskElt == -1)
+ continue;
+
+ // Peek through the shuffle masks to get the underlying source element.
+ int InnerMaskElt = InnerMask[OuterMaskElt];
+ if (InnerMaskElt == -1)
+ continue;
+
+ // Initialize the splatted element.
+ if (SplatIndex == -1)
+ SplatIndex = InnerMaskElt;
+
+ // Non-matching index - this is not a splat.
+ if (SplatIndex != InnerMaskElt)
+ return SDValue();
+
+ CombinedMask[i] = InnerMaskElt;
+ }
+ assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
+ getSplatIndex(CombinedMask) != -1) &&
+ "Expected a splat mask");
+
+ // TODO: The transform may be a win even if the mask is not legal.
+ EVT VT = OuterShuf->getValueType(0);
+ assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
+ InnerShuf->getOperand(1), CombinedMask);
+}
+
/// If the shuffle mask is taking exactly one element from the first vector
/// operand and passing through all other elements from the second vector
/// operand, return the index of the mask element that is choosing an element
@@ -19136,8 +20005,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// element used. Therefore, our new insert element occurs at the shuffle's
// mask index value, not the insert's index value.
// shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
- SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
- Op0.getOperand(2).getValueType());
+ SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
Op1, Op0.getOperand(1), NewInsIndex);
}
@@ -19223,6 +20091,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
return V;
+ if (SDValue V = formSplatFromShuffles(SVN, DAG))
+ return V;
+
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
@@ -19234,7 +20105,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue L = N0.getOperand(0), R = N0.getOperand(1);
SDLoc DL(N);
EVT EltVT = VT.getScalarType();
- SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
@@ -19354,16 +20225,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
- auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
- if (Scale == 1)
- return SmallVector<int, 8>(Mask.begin(), Mask.end());
-
- SmallVector<int, 8> NewMask;
- for (int M : Mask)
- for (int s = 0; s != Scale; ++s)
- NewMask.push_back(M < 0 ? -1 : Scale * M + s);
- return NewMask;
- };
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
@@ -19383,10 +20244,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Scale the shuffle masks to the smaller scalar type.
ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
- SmallVector<int, 8> InnerMask =
- ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
- SmallVector<int, 8> OuterMask =
- ScaleShuffleMask(SVN->getMask(), OuterScale);
+ SmallVector<int, 8> InnerMask;
+ SmallVector<int, 8> OuterMask;
+ narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
+ narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
// Merge the shuffle masks.
SmallVector<int, 8> NewMask;
@@ -19547,7 +20408,9 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
// with a VECTOR_SHUFFLE and possible truncate.
- if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ VT.isFixedLengthVector() &&
+ InVal->getOperand(0).getValueType().isFixedLengthVector()) {
SDValue InVec = InVal->getOperand(0);
SDValue EltNo = InVal->getOperand(1);
auto InVecT = InVec.getValueType();
@@ -19576,11 +20439,10 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
return LegalShuffle;
// If not we must truncate the vector.
if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
- EVT SubVT =
- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
- VT.getVectorNumElements());
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
+ InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
LegalShuffle, ZeroIdx);
}
@@ -19597,6 +20459,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ uint64_t InsIdx = N->getConstantOperandVal(2);
// If inserting an UNDEF, just return the original vector.
if (N1.isUndef())
@@ -19657,11 +20520,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
N1.getOperand(1), N2);
- if (!isa<ConstantSDNode>(N2))
- return SDValue();
-
- uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
-
// Push subvector bitcasts to the output, adjusting the index as we go.
// insert_subvector(bitcast(v), bitcast(s), c1)
// -> bitcast(insert_subvector(v, s, c2))
@@ -19676,19 +20534,18 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT NewVT;
SDLoc DL(N);
SDValue NewIdx;
- MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
- NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
- NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
@@ -19704,8 +20561,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// (insert_subvector (insert_subvector A, Idx0), Idx1)
// -> (insert_subvector (insert_subvector A, Idx1), Idx0)
if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
- N1.getValueType() == N0.getOperand(1).getValueType() &&
- isa<ConstantSDNode>(N0.getOperand(2))) {
+ N1.getValueType() == N0.getOperand(1).getValueType()) {
unsigned OtherIdx = N0.getConstantOperandVal(2);
if (InsIdx < OtherIdx) {
// Swap nodes.
@@ -19722,10 +20578,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
N0.getOperand(0).getValueType() == N1.getValueType()) {
unsigned Factor = N1.getValueType().getVectorNumElements();
-
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
- Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
-
+ Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
@@ -19769,9 +20623,9 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
// VECREDUCE over 1-element vector is just an extract.
if (VT.getVectorNumElements() == 1) {
SDLoc dl(N);
- SDValue Res = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Res =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getVectorIdxConstant(0, dl));
if (Res.getValueType() != N->getValueType(0))
Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
return Res;
@@ -19904,10 +20758,9 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDLoc DL(N);
- SDValue IndexC =
- DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
- SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
- SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
// If all lanes but 1 are undefined, no need to splat the scalar result.
@@ -19937,6 +20790,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue Ops[] = {LHS, RHS};
EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
+ SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
@@ -19960,10 +20814,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
- RHS.getOperand(0), N->getFlags());
+ RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
}
+
+ // Try to sink a splat shuffle after a binop with a uniform constant.
+ // This is limited to cases where neither the shuffle nor the constant have
+ // undefined elements because that could be poison-unsafe or inhibit
+ // demanded elements analysis. It is further limited to not change a splat
+ // of an inserted scalar because that may be optimized better by
+ // load-folding or other target-specific behaviors.
+ if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
+ Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
+ Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat X), (splat C) --> splat (binop X, C)
+ SDLoc DL(N);
+ SDValue X = Shuf0->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf0->getMask());
+ }
+ if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
+ Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
+ Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
+ // binop (splat C), (splat X) --> splat (binop C, X)
+ SDLoc DL(N);
+ SDValue X = Shuf1->getOperand(0);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
+ Shuf1->getMask());
+ }
}
// The following pattern is likely to emerge with vector reduction ops. Moving
@@ -20361,8 +21242,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
- TD.getPrefTypeAlignment(FPTy));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ TD.getPrefTypeAlign(FPTy));
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
// Get offsets to the 0 and 1 elements of the array, so we can select between
// them.
@@ -20797,7 +21678,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
DenormalMode DenormMode = DAG.getDenormalMode(VT);
- if (DenormMode == DenormalMode::IEEE) {
+ if (DenormMode.Input == DenormalMode::IEEE) {
+ // This is specifically a check for the handling of denormal inputs,
+ // not the result.
+
// fabs(X) < SmallestNormal ? 0.0 : Est
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
@@ -20849,9 +21733,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
Offset /*base offset*/,
- Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ Optional<int64_t>(Size),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
@@ -20911,21 +21797,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
- // cases created by splitting vector types.
+ // cases created by splitting vector types, it only works when the offsets are
+ // multiples of the size of the data.
int64_t SrcValOffset0 = MUC0.MMO->getOffset();
int64_t SrcValOffset1 = MUC1.MMO->getOffset();
- unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
- unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
+ Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
+ Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
+ auto &Size0 = MUC0.NumBytes;
+ auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
- *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
- int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
- int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
+ Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
+ OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+ SrcValOffset1 % *Size1 == 0) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
- (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
+ if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
return false;
}
@@ -20938,11 +21827,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0.hasValue() && Size1.hasValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
AliasResult AAResult = AA->alias(
MemoryLocation(MUC0.MMO->getValue(), Overlap0,
UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
@@ -21099,10 +21989,10 @@ bool operator!=(const UnitT &, const UnitT &) { return false; }
// redundant, as this function gets called when visiting every store
// node, so why not let the work be done on each store as it's visited?
//
-// I believe this is mainly important because MergeConsecutiveStores
+// I believe this is mainly important because mergeConsecutiveStores
// is unable to deal with merging stores of different sizes, so unless
// we improve the chains of all the potential candidates up-front
-// before running MergeConsecutiveStores, it might only see some of
+// before running mergeConsecutiveStores, it might only see some of
// the nodes that will eventually be candidates, and then not be able
// to go from a partially-merged state to the desired final
// fully-merged state.
@@ -21131,6 +22021,12 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (BasePtr.getBase().isUndef())
return false;
+ // BaseIndexOffset assumes that offsets are fixed-size, which
+ // is not valid for scalable vectors where the offsets are
+ // scaled by `vscale`, so bail out early.
+ if (St->getMemoryVT().isScalableVector())
+ return false;
+
// Add ST's interval.
Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2bec8613e79c4..fc6c3a145f132 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -68,7 +68,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -156,7 +155,7 @@ bool FastISel::lowerArguments() {
for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
E = FuncInfo.Fn->arg_end();
I != E; ++I) {
- DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
+ DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I);
assert(VI != LocalValueMap.end() && "Missed an argument?");
FuncInfo.ValueMap[&*I] = VI->second;
}
@@ -165,8 +164,8 @@ bool FastISel::lowerArguments() {
/// Return the defined register if this instruction defines exactly one
/// virtual register and uses no other virtual registers. Otherwise return 0.
-static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
- unsigned RegDef = 0;
+static Register findSinkableLocalRegDef(MachineInstr &MI) {
+ Register RegDef;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -174,9 +173,9 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
if (RegDef)
return 0;
RegDef = MO.getReg();
- } else if (Register::isVirtualRegister(MO.getReg())) {
+ } else if (MO.getReg().isVirtual()) {
// This is another use of a vreg. Don't try to sink it.
- return 0;
+ return Register();
}
}
return RegDef;
@@ -202,7 +201,7 @@ void FastISel::flushLocalValueMap() {
bool Store = true;
if (!LocalMI.isSafeToMove(nullptr, Store))
continue;
- unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ Register DefReg = findSinkableLocalRegDef(LocalMI);
if (DefReg == 0)
continue;
@@ -217,7 +216,7 @@ void FastISel::flushLocalValueMap() {
LastFlushPoint = FuncInfo.InsertPt;
}
-static bool isRegUsedByPhiNodes(unsigned DefReg,
+static bool isRegUsedByPhiNodes(Register DefReg,
FunctionLoweringInfo &FuncInfo) {
for (auto &P : FuncInfo.PHINodesToUpdate)
if (P.second == DefReg)
@@ -225,6 +224,21 @@ static bool isRegUsedByPhiNodes(unsigned DefReg,
return false;
}
+static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
+ // Ignore non-EH labels.
+ if (!MI.isEHLabel())
+ return false;
+
+ // Any EH label outside a landing pad must be for an invoke. Consider it a
+ // terminator.
+ if (!MBB->isEHPad())
+ return true;
+
+ // If this is a landingpad, the first non-phi instruction will be an EH_LABEL.
+ // Don't consider that label to be a terminator.
+ return MI.getIterator() != MBB->getFirstNonPHI();
+}
+
/// Build a map of instruction orders. Return the first terminator and its
/// order. Consider EH_LABEL instructions to be terminators as well, since local
/// values for phis after invokes must be materialized before the call.
@@ -233,7 +247,7 @@ void FastISel::InstOrderMap::initialize(
unsigned Order = 0;
for (MachineInstr &I : *MBB) {
if (!FirstTerminator &&
- (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) {
+ (I.isTerminator() || isTerminatingEHLabel(MBB, I))) {
FirstTerminator = &I;
FirstTerminatorOrder = Order;
}
@@ -246,7 +260,7 @@ void FastISel::InstOrderMap::initialize(
}
void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
- unsigned DefReg,
+ Register DefReg,
InstOrderMap &OrderMap) {
// If this register is used by a register fixup, MRI will not contain all
// the uses until after register fixups, so don't attempt to sink or DCE
@@ -341,7 +355,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
// Even the value might have only one use in the LLVM IR, it is possible that
// FastISel might fold the use into another instruction and now there is more
// than one use at the Machine Instruction level.
- unsigned Reg = lookUpRegForValue(V);
+ Register Reg = lookUpRegForValue(V);
if (Reg && !MRI.use_empty(Reg))
return false;
@@ -359,11 +373,11 @@ bool FastISel::hasTrivialKill(const Value *V) {
cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
}
-unsigned FastISel::getRegForValue(const Value *V) {
+Register FastISel::getRegForValue(const Value *V) {
EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
if (!RealVT.isSimple())
- return 0;
+ return Register();
// Ignore illegal types. We must do this before looking up the value
// in ValueMap because Arguments are given virtual registers regardless
@@ -374,11 +388,11 @@ unsigned FastISel::getRegForValue(const Value *V) {
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
else
- return 0;
+ return Register();
}
// Look up the value to see if we already have a register for it.
- unsigned Reg = lookUpRegForValue(V);
+ Register Reg = lookUpRegForValue(V);
if (Reg)
return Reg;
@@ -400,8 +414,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
- unsigned Reg = 0;
+Register FastISel::materializeConstant(const Value *V, MVT VT) {
+ Register Reg;
if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getValue().getActiveBits() <= 64)
Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
@@ -428,9 +442,9 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
bool isExact;
(void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
if (isExact) {
- unsigned IntegerReg =
+ Register IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
- if (IntegerReg != 0)
+ if (IntegerReg)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
/*Kill=*/false);
}
@@ -452,8 +466,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
/// Helper for getRegForValue. This function is called when the value isn't
/// already available in a register and must be materialized with new
/// instructions.
-unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
- unsigned Reg = 0;
+Register FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ Register Reg;
// Give the target-specific code a try first.
if (isa<Constant>(V))
Reg = fastMaterializeConstant(cast<Constant>(V));
@@ -472,25 +486,25 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
return Reg;
}
-unsigned FastISel::lookUpRegForValue(const Value *V) {
+Register FastISel::lookUpRegForValue(const Value *V) {
// Look up the value to see if we already have a register for it. We
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V);
if (I != FuncInfo.ValueMap.end())
return I->second;
return LocalValueMap[V];
}
-void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) {
if (!isa<Instruction>(I)) {
LocalValueMap[I] = Reg;
return;
}
- unsigned &AssignedReg = FuncInfo.ValueMap[I];
- if (AssignedReg == 0)
+ Register &AssignedReg = FuncInfo.ValueMap[I];
+ if (!AssignedReg)
// Use the new register.
AssignedReg = Reg;
else if (Reg != AssignedReg) {
@@ -504,11 +518,11 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
}
}
-std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
- unsigned IdxN = getRegForValue(Idx);
- if (IdxN == 0)
+std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ Register IdxN = getRegForValue(Idx);
+ if (!IdxN)
// Unhandled operand. Halt "fast" selection and bail.
- return std::pair<unsigned, bool>(0, false);
+ return std::pair<Register, bool>(Register(), false);
bool IdxNIsKill = hasTrivialKill(Idx);
@@ -524,7 +538,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
IdxNIsKill = true;
}
- return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+ return std::pair<Register, bool>(IdxN, IdxNIsKill);
}
void FastISel::recomputeInsertPt() {
@@ -605,12 +619,12 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
// we don't have anything that canonicalizes operand order.
if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
- unsigned Op1 = getRegForValue(I->getOperand(1));
+ Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1)
return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg =
+ Register ResultReg =
fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
CI->getZExtValue(), VT.getSimpleVT());
if (!ResultReg)
@@ -621,7 +635,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
return true;
}
- unsigned Op0 = getRegForValue(I->getOperand(0));
+ Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
@@ -644,7 +658,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::AND;
}
- unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (!ResultReg)
return false;
@@ -654,13 +668,13 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
return true;
}
- unsigned Op1 = getRegForValue(I->getOperand(1));
+ Register Op1 = getRegForValue(I->getOperand(1));
if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
// Now we have both operands in registers. Emit the instruction.
- unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
if (!ResultReg)
// Target-specific code wasn't able to find a machine opcode for
@@ -673,7 +687,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
}
bool FastISel::selectGetElementPtr(const User *I) {
- unsigned N = getRegForValue(I->getOperand(0));
+ Register N = getRegForValue(I->getOperand(0));
if (!N) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool NIsKill = hasTrivialKill(I->getOperand(0));
@@ -729,8 +743,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
// N = N + Idx * ElementSize;
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
- std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
- unsigned IdxN = Pair.first;
+ std::pair<Register, bool> Pair = getRegForGEPIndex(Idx);
+ Register IdxN = Pair.first;
bool IdxNIsKill = Pair.second;
if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -778,7 +792,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
else
return false;
} else {
- unsigned Reg = getRegForValue(Val);
+ Register Reg = getRegForValue(Val);
if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
@@ -871,7 +885,6 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
Args.reserve(NumArgs);
// Populate the argument list.
- ImmutableCallSite CS(CI);
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
Value *V = CI->getOperand(ArgI);
@@ -880,7 +893,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(CI, ArgI);
Args.push_back(Entry);
}
@@ -987,7 +1000,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// place these in any free register.
if (IsAnyRegCC) {
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
- unsigned Reg = getRegForValue(I->getArgOperand(i));
+ Register Reg = getRegForValue(I->getArgOperand(i));
if (!Reg)
return false;
Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
@@ -1104,10 +1117,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
unsigned NumArgs) {
- ImmutableCallSite CS(CI);
-
- FunctionType *FTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
ArgListTy Args;
Args.reserve(NumArgs);
@@ -1122,13 +1133,13 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(CI, ArgI);
Args.push_back(Entry);
}
- TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
+ TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args);
CallLoweringInfo CLI;
- CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
+ CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs);
return lowerCallTo(CLI);
}
@@ -1203,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// the various CC lowering callbacks.
Flags.setByVal();
}
- if (Arg.IsByVal || Arg.IsInAlloca) {
+ if (Arg.IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If we
+ // port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
unsigned FrameSize =
@@ -1211,17 +1231,17 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// For ByVal, alignment should come from FE. BE will guess if this info
// is not there, but there are cases it cannot get right.
- unsigned FrameAlign = Arg.Alignment;
+ MaybeAlign FrameAlign = Arg.Alignment;
if (!FrameAlign)
- FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
+ FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL));
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(Align(FrameAlign));
+ Flags.setByValAlign(*FrameAlign);
}
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
+ Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
@@ -1234,29 +1254,26 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
assert(CLI.Call && "No call instruction specified.");
CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
- if (CLI.NumResultRegs && CLI.CS)
- updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+ if (CLI.NumResultRegs && CLI.CB)
+ updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS)
- if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
+ if (CLI.CB)
+ if (MDNode *MD = CLI.CB->getMetadata("heapallocsite"))
CLI.Call->setHeapAllocMarker(*MF, MD);
return true;
}
bool FastISel::lowerCall(const CallInst *CI) {
- ImmutableCallSite CS(CI);
-
- FunctionType *FuncTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FuncTy = CI->getFunctionType();
+ Type *RetTy = CI->getType();
ArgListTy Args;
ArgListEntry Entry;
- Args.reserve(CS.arg_size());
+ Args.reserve(CI->arg_size());
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
+ for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) {
Value *V = *i;
// Skip empty types
@@ -1267,14 +1284,14 @@ bool FastISel::lowerCall(const CallInst *CI) {
Entry.Ty = V->getType();
// Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin());
+ Entry.setAttributes(CI, i - CI->arg_begin());
Args.push_back(Entry);
}
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within fastLowerCall.
bool IsTailCall = CI->isTailCall();
- if (IsTailCall && !isInTailCallPosition(CS, TM))
+ if (IsTailCall && !isInTailCallPosition(*CI, TM))
IsTailCall = false;
if (IsTailCall && MF->getFunction()
.getFnAttribute("disable-tail-calls")
@@ -1282,7 +1299,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
IsTailCall = false;
CallLoweringInfo CLI;
- CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
return lowerCallTo(CLI);
@@ -1292,7 +1309,7 @@ bool FastISel::selectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
// Handle simple inline asms.
- if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
// If the inline asm has side effects, then make sure that no local value
// lives across by flushing the local value map.
if (IA->hasSideEffects())
@@ -1307,12 +1324,19 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ if (Call->isConvergent())
+ ExtraInfo |= InlineAsm::Extra_IsConvergent;
ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::INLINEASM))
- .addExternalSymbol(IA->getAsmString().c_str())
- .addImm(ExtraInfo);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::INLINEASM));
+ MIB.addExternalSymbol(IA->getAsmString().c_str());
+ MIB.addImm(ExtraInfo);
+
+ const MDNode *SrcLoc = Call->getMetadata("srcloc");
+ if (SrcLoc)
+ MIB.addMetadata(SrcLoc);
+
return true;
}
@@ -1350,13 +1374,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
assert(DI->getVariable() && "Missing variable");
if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (!hasDebugInfo)\n");
return true;
}
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (bad/undef address)\n");
return true;
}
@@ -1368,7 +1394,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
Optional<MachineOperand> Op;
- if (unsigned Reg = lookUpRegForValue(Address))
+ if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
// If we have a VLA that has a "use" in a metadata node that's then used
@@ -1393,15 +1419,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
- auto *Expr = DI->getExpression();
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
- *Op, DI->getVariable(), Expr);
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ *Op, DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
+ << " (no materialized reg for address)\n");
}
return true;
}
@@ -1412,38 +1437,37 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const Value *V = DI->getValue();
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
- if (!V) {
+ if (!V || isa<UndefValue>(V)) {
// Currently the optimizer can produce this; insert an undef to
- // help debugging. Probably the optimizer should not do this.
+ // help debugging.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addReg(0U)
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
- } else if (unsigned Reg = lookUpRegForValue(V)) {
+ } else if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
DI->getVariable(), DI->getExpression());
} else {
- // We can't yet handle anything else here because it would require
- // generating code, thus altering codegen because of debug info.
+ // We don't know how to handle other cases, so we drop.
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
}
return true;
@@ -1469,7 +1493,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
- unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ Register ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
return false;
updateValueMap(II, ResultReg);
@@ -1507,14 +1531,14 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
if (!TLI.isTypeLegal(SrcVT))
return false;
- unsigned InputReg = getRegForValue(I->getOperand(0));
+ Register InputReg = getRegForValue(I->getOperand(0));
if (!InputReg)
// Unhandled operand. Halt "fast" selection and bail.
return false;
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
- unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
Opcode, InputReg, InputRegIsKill);
if (!ResultReg)
return false;
@@ -1526,7 +1550,7 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
bool FastISel::selectBitCast(const User *I) {
// If the bitcast doesn't change the type, just use the operand value.
if (I->getType() == I->getOperand(0)->getType()) {
- unsigned Reg = getRegForValue(I->getOperand(0));
+ Register Reg = getRegForValue(I->getOperand(0));
if (!Reg)
return false;
updateValueMap(I, Reg);
@@ -1543,13 +1567,13 @@ bool FastISel::selectBitCast(const User *I) {
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DstVT = DstEVT.getSimpleVT();
- unsigned Op0 = getRegForValue(I->getOperand(0));
+ Register Op0 = getRegForValue(I->getOperand(0));
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
- unsigned ResultReg = 0;
+ Register ResultReg;
if (SrcVT == DstVT) {
const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
@@ -1572,6 +1596,27 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+bool FastISel::selectFreeze(const User *I) {
+ Register Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ // Unhandled operand.
+ return false;
+
+ EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType());
+ if (ETy == MVT::Other || !TLI.isTypeLegal(ETy))
+ // Unhandled type, bail out.
+ return false;
+
+ MVT Ty = ETy.getSimpleVT();
+ const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty);
+ Register ResultReg = createResultReg(TyRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg);
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
// Remove local value instructions starting from the instruction after
// SavedLastLocalValue to the current function insert point.
void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1607,9 +1652,9 @@ bool FastISel::selectInstruction(const Instruction *I) {
}
// FastISel does not handle any operand bundles except OB_funclet.
- if (ImmutableCallSite CS = ImmutableCallSite(I))
- for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
- if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ if (auto *Call = dyn_cast<CallBase>(I))
+ for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i)
+ if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
return false;
DbgLoc = I->getDebugLoc();
@@ -1710,14 +1755,14 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
/// Emit an FNeg operation.
bool FastISel::selectFNeg(const User *I, const Value *In) {
- unsigned OpReg = getRegForValue(In);
+ Register OpReg = getRegForValue(In);
if (!OpReg)
return false;
bool OpRegIsKill = hasTrivialKill(In);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(DL, I->getType());
- unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
OpReg, OpRegIsKill);
if (ResultReg) {
updateValueMap(I, ResultReg);
@@ -1732,12 +1777,12 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
if (!TLI.isTypeLegal(IntVT))
return false;
- unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
ISD::BITCAST, OpReg, OpRegIsKill);
if (!IntReg)
return false;
- unsigned IntResultReg = fastEmit_ri_(
+ Register IntResultReg = fastEmit_ri_(
IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
if (!IntResultReg)
@@ -1771,7 +1816,7 @@ bool FastISel::selectExtractValue(const User *U) {
// Get the base result register.
unsigned ResultReg;
- DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0);
if (I != FuncInfo.ValueMap.end())
ResultReg = I->second;
else if (isa<Instruction>(Op0))
@@ -1903,7 +1948,7 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectCast(I, ISD::ZERO_EXTEND);
if (DstVT.bitsLT(SrcVT))
return selectCast(I, ISD::TRUNCATE);
- unsigned Reg = getRegForValue(I->getOperand(0));
+ Register Reg = getRegForValue(I->getOperand(0));
if (!Reg)
return false;
updateValueMap(I, Reg);
@@ -1913,6 +1958,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
case Instruction::ExtractValue:
return selectExtractValue(I);
+ case Instruction::Freeze:
+ return selectFreeze(I);
+
case Instruction::PHI:
llvm_unreachable("FastISel shouldn't visit PHI nodes!");
@@ -1975,7 +2023,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
/// instruction with an immediate operand using fastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
/// fastEmit_rr instead.
-unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
bool Op0IsKill, uint64_t Imm, MVT ImmType) {
// If this is a multiply by a power of two, emit this as a shift left.
if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
@@ -1994,10 +2042,10 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
return 0;
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg)
return ResultReg;
- unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
bool IsImmKill = true;
if (!MaterialReg) {
// This is a bit ugly/slow, but failing here means falling out of
@@ -2018,19 +2066,19 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
}
-unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
+Register FastISel::createResultReg(const TargetRegisterClass *RC) {
return MRI.createVirtualRegister(RC);
}
-unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op,
unsigned OpNum) {
- if (Register::isVirtualRegister(Op)) {
+ if (Op.isVirtual()) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
if (!MRI.constrainRegClass(Op, RegClass)) {
// If it's not legal to COPY between the register classes, something
// has gone very wrong before we got here.
- unsigned NewOp = createResultReg(RegClass);
+ Register NewOp = createResultReg(RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
return NewOp;
@@ -2039,21 +2087,21 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
return Op;
}
-unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass *RC) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
return ResultReg;
}
-unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2069,13 +2117,13 @@ unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
@@ -2093,14 +2141,14 @@ unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill, unsigned Op2,
bool Op2IsKill) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
@@ -2121,12 +2169,12 @@ unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2143,13 +2191,13 @@ unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, uint64_t Imm1,
uint64_t Imm2) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
if (II.getNumDefs() >= 1)
@@ -2168,12 +2216,12 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
const ConstantFP *FPImm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
@@ -2187,13 +2235,13 @@ unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, unsigned Op0,
bool Op0IsKill, unsigned Op1,
bool Op1IsKill, uint64_t Imm) {
const MCInstrDesc &II = TII.get(MachineInstOpcode);
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
@@ -2213,9 +2261,9 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC, uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
+ Register ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
@@ -2229,9 +2277,9 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
-unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
@@ -2243,7 +2291,7 @@ unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
/// Emit MachineInstrs to compute the value of Op with all but the least
/// significant bit set to zero.
-unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
@@ -2305,7 +2353,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
- unsigned Reg = getRegForValue(PHIOp);
+ Register Reg = getRegForValue(PHIOp);
if (!Reg) {
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
@@ -2351,7 +2399,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
// Figure out which vreg this is going into. If there is no assigned vreg yet
// then there actually was no reference to it. Perhaps the load is referenced
// by a dead instruction.
- unsigned LoadReg = getRegForValue(LI);
+ Register LoadReg = getRegForValue(LI);
if (!LoadReg)
return false;
@@ -2394,18 +2442,18 @@ MachineMemOperand *
FastISel::createMachineMemOperandFor(const Instruction *I) const {
const Value *Ptr;
Type *ValTy;
- unsigned Alignment;
+ MaybeAlign Alignment;
MachineMemOperand::Flags Flags;
bool IsVolatile;
if (const auto *LI = dyn_cast<LoadInst>(I)) {
- Alignment = LI->getAlignment();
+ Alignment = LI->getAlign();
IsVolatile = LI->isVolatile();
Flags = MachineMemOperand::MOLoad;
Ptr = LI->getPointerOperand();
ValTy = LI->getType();
} else if (const auto *SI = dyn_cast<StoreInst>(I)) {
- Alignment = SI->getAlignment();
+ Alignment = SI->getAlign();
IsVolatile = SI->isVolatile();
Flags = MachineMemOperand::MOStore;
Ptr = SI->getPointerOperand();
@@ -2421,8 +2469,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
AAMDNodes AAInfo;
I->getAAMetadata(AAInfo);
- if (Alignment == 0) // Ensure that codegen never sees alignment 0.
- Alignment = DL.getABITypeAlignment(ValTy);
+ if (!Alignment) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlign(ValTy);
unsigned Size = DL.getTypeStoreSize(ValTy);
@@ -2436,7 +2484,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags |= MachineMemOperand::MOInvariant;
return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
- Alignment, AAInfo, Ranges);
+ *Alignment, AAInfo, Ranges);
}
CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index fa33400cd4b3f..5cf83cff3a903 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -85,7 +86,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- unsigned StackAlign = TFI->getStackAlignment();
DA = DAG->getDivergenceAnalysis();
// Check whether the function can return without sret-demotion.
@@ -130,19 +130,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
+ const Align StackAlign = TFI->getStackAlign();
for (const BasicBlock &BB : *Fn) {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Type *Ty = AI->getAllocatedType();
- unsigned Align =
- std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
- AI->getAlignment());
+ Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
+ // The "specified" alignment is the alignment written on the alloca,
+ // or the preferred alignment of the type if none is specified.
+ //
+ // (Unspecified alignment on allocas will be going away soon.)
+ Align SpecifiedAlign = AI->getAlign();
+
+ // If the preferred alignment of the type is higher than the specified
+ // alignment of the alloca, promote the alignment, as long as it doesn't
+ // require realigning the stack.
+ //
+ // FIXME: Do we really want to second-guess the IR in isel?
+ Align Alignment =
+ std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
// do this if there is an extra alignment requirement.
if (AI->isStaticAlloca() &&
- (TFI->isStackRealignable() || (Align <= StackAlign))) {
+ (TFI->isStackRealignable() || (Alignment <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
uint64_t TySize =
MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
@@ -154,15 +166,15 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
FrameIndex = MF->getFrameInfo().CreateFixedObject(
TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
- MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
+ MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment);
} else {
- FrameIndex =
- MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
+ FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment,
+ false, AI);
}
// Scalable vectors may need a special StackID to distinguish
// them from other (fixed size) stack objects.
- if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+ if (isa<ScalableVectorType>(Ty))
MF->getFrameInfo().setStackID(FrameIndex,
TFI->getStackIDForScalableVectors());
@@ -176,21 +188,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// FIXME: Overaligned static allocas should be grouped into
// a single dynamic allocation instead of using a separate
// stack allocation for each one.
- if (Align <= StackAlign)
- Align = 0;
// Inform the Frame Information that we have variable-sized objects.
- MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
+ MF->getFrameInfo().CreateVariableSizedObject(
+ Alignment <= StackAlign ? Align(1) : Alignment, AI);
}
}
// Look for inline asm that clobbers the SP register.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
- if (isa<InlineAsm>(CS.getCalledValue())) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (Call->isInlineAsm()) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
- TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
+ *Call);
for (TargetLowering::AsmOperandInfo &Op : Ops) {
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
@@ -354,7 +365,7 @@ void FunctionLoweringInfo::clear() {
}
/// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
return RegInfo->createVirtualRegister(
MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
}
@@ -366,29 +377,29 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
- unsigned FirstReg = 0;
+ Register FirstReg;
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
EVT ValueVT = ValueVTs[Value];
MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = CreateReg(RegisterVT, isDivergent);
+ Register R = CreateReg(RegisterVT, isDivergent);
if (!FirstReg) FirstReg = R;
}
}
return FirstReg;
}
-unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
- return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
- DA->isDivergent(V));
+Register FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), DA && DA->isDivergent(V) &&
+ !TLI->requiresUniformRegister(*MF, V));
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
@@ -397,7 +408,7 @@ unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
/// the larger bit width by zero extension. The bit width must be no smaller
/// than the LiveOutInfo's existing bit width.
const FunctionLoweringInfo::LiveOutInfo *
-FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+FunctionLoweringInfo::GetLiveOutRegInfo(Register Reg, unsigned BitWidth) {
if (!LiveOutRegInfo.inBounds(Reg))
return nullptr;
@@ -407,7 +418,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (BitWidth > LOI->Known.getBitWidth()) {
LOI->NumSignBits = 1;
- LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
+ LOI->Known = LOI->Known.anyext(BitWidth);
}
return LOI;
@@ -431,7 +442,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
unsigned BitWidth = IntVT.getSizeInBits();
- unsigned DestReg = ValueMap[PN];
+ Register DestReg = ValueMap[PN];
if (!Register::isVirtualRegister(DestReg))
return;
LiveOutRegInfo.grow(DestReg);
@@ -452,7 +463,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
} else {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
- unsigned SrcReg = ValueMap[V];
+ Register SrcReg = ValueMap[V];
if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
@@ -487,8 +498,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
"its CopyToReg node was created.");
- unsigned SrcReg = ValueMap[V];
- if (!Register::isVirtualRegister(SrcReg)) {
+ Register SrcReg = ValueMap[V];
+ if (!SrcReg.isVirtual()) {
DestLOI.IsValid = false;
return;
}
@@ -522,11 +533,11 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
return INT_MAX;
}
-unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+Register FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
const Value *CPI, const TargetRegisterClass *RC) {
MachineRegisterInfo &MRI = MF->getRegInfo();
auto I = CatchPadExceptionPointers.insert({CPI, 0});
- unsigned &VReg = I.first->second;
+ Register &VReg = I.first->second;
if (I.second)
VReg = MRI.createVirtualRegister(RC);
assert(VReg && "null vreg in exception pointer table!");
@@ -534,7 +545,7 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
}
const Value *
-FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
+FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) {
if (VirtReg2Value.empty()) {
SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c613c25406285..0e4e99214aa24 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -84,9 +86,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
/// implicit physical register output.
void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
- unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
- unsigned VRBase = 0;
- if (Register::isVirtualRegister(SrcReg)) {
+ Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) {
+ Register VRBase;
+ if (SrcReg.isVirtual()) {
// Just use the input register directly!
SDValue Op(Node, ResNo);
if (IsClone)
@@ -113,8 +115,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
- unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(DestReg)) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
VRBase = DestReg;
Match = false;
} else if (DestReg != SrcReg)
@@ -190,16 +192,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
"IMPLICIT_DEF should have been handled as a special case elsewhere!");
unsigned NumResults = CountResults(Node);
- for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+ for (unsigned i = 0; i < NumVRegs; ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
// register instead of creating a new vreg.
- unsigned VRBase = 0;
+ Register VRBase;
const TargetRegisterClass *RC =
TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
// Always let the value type influence the used register class. The
@@ -216,10 +221,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
RC = VTRC;
}
- if (II.OpInfo[i].isOptionalDef()) {
+ if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
- assert(Register::isPhysicalRegister(VRBase));
+ assert(VRBase.isPhysical());
MIB.addReg(VRBase, RegState::Define);
}
@@ -263,8 +268,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
/// getVR - Return the virtual register corresponding to the specified result
/// of the specified node.
-unsigned InstrEmitter::getVR(SDValue Op,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+Register InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap) {
if (Op.isMachineOpcode() &&
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
@@ -278,7 +283,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
return VReg;
}
- DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
assert(I != VRBaseMap.end() && "Node emitted out of order - late");
return I->second;
}
@@ -292,13 +297,13 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
"Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
- unsigned VReg = getVR(Op, VRBaseMap);
+ Register VReg = getVR(Op, VRBaseMap);
const MCInstrDesc &MCID = MIB->getDesc();
bool isOptDef = IIOpNum < MCID.getNumOperands() &&
@@ -363,7 +368,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
if (Op.isMachineOpcode()) {
AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
@@ -373,7 +378,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
MIB.addFPImm(F->getConstantFPValue());
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
- unsigned VReg = R->getReg();
+ Register VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
const TargetRegisterClass *IIRC =
II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
@@ -409,23 +414,14 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
int Offset = CP->getOffset();
- unsigned Align = CP->getAlignment();
- Type *Type = CP->getType();
- // MachineConstantPool wants an explicit alignment.
- if (Align == 0) {
- Align = MF->getDataLayout().getPrefTypeAlignment(Type);
- if (Align == 0) {
- // Alignment of vector types. FIXME!
- Align = MF->getDataLayout().getTypeAllocSize(Type);
- }
- }
+ Align Alignment = CP->getAlign();
unsigned Idx;
MachineConstantPool *MCP = MF->getConstantPool();
if (CP->isMachineConstantPoolEntry())
- Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Alignment);
else
- Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Alignment);
MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
} else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
@@ -446,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
}
}
-unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx,
MVT VT, bool isDivergent, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -473,9 +469,9 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void InstrEmitter::EmitSubregNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned) {
- unsigned VRBase = 0;
+ Register VRBase;
unsigned Opc = Node->getMachineOpcode();
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
@@ -483,8 +479,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
for (SDNode *User : Node->uses()) {
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
- unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (Register::isVirtualRegister(DestReg)) {
+ Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (DestReg.isVirtual()) {
VRBase = DestReg;
break;
}
@@ -499,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
- unsigned Reg;
+ Register Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
if (R && Register::isPhysicalRegister(R->getReg())) {
@@ -510,7 +506,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
DefMI = MRI->getVRegDef(Reg);
}
- unsigned SrcReg, DstReg, DefSubIdx;
+ Register SrcReg, DstReg;
+ unsigned DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
SubIdx == DefSubIdx &&
@@ -528,19 +525,19 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
Node->isDivergent(), Node->getDebugLoc());
// Create the destreg if it is missing.
- if (VRBase == 0)
+ if (!VRBase)
VRBase = MRI->createVirtualRegister(TRC);
// Create the extract_subreg machine instruction.
MachineInstrBuilder CopyMI =
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase);
- if (Register::isVirtualRegister(Reg))
+ if (Reg.isVirtual())
CopyMI.addReg(Reg, 0, SubIdx);
else
CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
@@ -606,7 +603,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
///
void
InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
// Create the new VReg in the destination class and emit a copy.
@@ -626,7 +623,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
void InstrEmitter::EmitRegSequence(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
@@ -675,9 +672,9 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
///
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- const DIExpression *Expr = SD->getExpression();
+ MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -701,11 +698,12 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx());
-
if (SD->isIndirect())
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
- FrameMI.addReg(0);
+ // Push [fi + 0] onto the DIExpression stack.
+ FrameMI.addImm(0);
+ else
+ // Push fi onto the DIExpression stack.
+ FrameMI.addReg(0);
return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
@@ -719,7 +717,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// they happen and transfer the debug info, but trying to guarantee that
// in all cases would be very fragile; this is a safeguard for any
// that were missed.
- DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
if (I==VRBaseMap.end())
MIB.addReg(0U); // undef
else
@@ -751,9 +749,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
- MIB.addReg(0U, RegState::Debug);
+ MIB.addImm(0U);
+ else
+ MIB.addReg(0U, RegState::Debug);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -780,7 +778,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
///
void InstrEmitter::
EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
unsigned Opc = Node->getMachineOpcode();
// Handle subreg insert/extract specially
@@ -828,7 +826,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumImpUses = 0;
unsigned NodeOperands =
countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
- bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
+ bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
+ II.isVariadic() && II.variadicOpsAreDefs();
+ bool HasPhysRegOuts = NumResults > NumDefs &&
+ II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -978,7 +979,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
/// needed dependencies.
void InstrEmitter::
EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
switch (Node->getOpcode()) {
default:
#ifndef NDEBUG
@@ -991,7 +992,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
- unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
@@ -1001,7 +1002,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
break;
}
- unsigned SrcReg;
+ Register SrcReg;
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
SrcReg = R->getReg();
else
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index cfe99dd977b5b..c3567eae91619 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -17,13 +17,15 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
namespace llvm {
class MachineInstrBuilder;
class MCInstrDesc;
+class SDDbgLabel;
class SDDbgValue;
+class TargetLowering;
class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineFunction *MF;
@@ -39,19 +41,19 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// implicit physical register output.
void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
bool IsClone, bool IsCloned,
- unsigned SrcReg,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ Register SrcReg,
+ DenseMap<SDValue, Register> &VRBaseMap);
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// getVR - Return the virtual register corresponding to the specified result
/// of the specified node.
- unsigned getVR(SDValue Op,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ Register getVR(SDValue Op,
+ DenseMap<SDValue, Register> &VRBaseMap);
/// AddRegisterOperand - Add the specified register as an operand to the
/// specified machine instr. Insert register copies if the register is
@@ -60,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
/// AddOperand - Add the specified operand to the specified machine instr. II
@@ -71,18 +73,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned);
/// ConstrainForSubReg - Try to constrain VReg to a register class that
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
- unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
+ Register ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT,
bool isDivergent, const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
- void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned);
/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
@@ -90,11 +92,11 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// register is constrained to be in a particular register class.
///
void EmitCopyToRegClassNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
- void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap,
bool IsClone, bool IsCloned);
public:
/// CountResults - The results of target nodes have register or immediate
@@ -105,7 +107,7 @@ public:
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
/// Generate machine instruction for a dbg_label node.
MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
@@ -113,7 +115,7 @@ public:
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, Register> &VRBaseMap) {
if (Node->isMachineOpcode())
EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
else
@@ -132,9 +134,9 @@ public:
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, Register> &VRBaseMap);
};
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 80ac8b95e4ef4..6a6004c158bb8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -328,7 +328,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
SDValue CPIdx =
DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
if (Extend) {
SDValue Result = DAG.getExtLoad(
ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
@@ -348,7 +348,7 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
EVT VT = CP->getValueType(0);
SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
SDValue Result = DAG.getLoad(
VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
@@ -387,7 +387,9 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
+ Ch = DAG.getTruncStore(
+ Ch, dl, Tmp2, StackPtr2,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), SPFI));
@@ -434,7 +436,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// We generally can't do this one for long doubles.
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
@@ -444,8 +445,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
SDLoc(CFP), MVT::i32);
- return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment,
- MMOFlags, AAInfo);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -454,7 +455,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -467,12 +468,12 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
- MMOFlags, AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- MinAlign(Alignment, 4U), MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -487,7 +488,6 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDValue Ptr = ST->getBasePtr();
SDLoc dl(Node);
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
@@ -528,9 +528,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
"Can only promote stores to same size type");
Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -553,7 +552,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -575,7 +574,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
+ RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -584,10 +583,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(
- Chain, dl, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -596,18 +594,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(ExtraWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl,
Ptr.getValueType()));
- Lo = DAG.getTruncStore(
- Chain, dl, Value, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo);
}
// The order of the stores doesn't matter.
@@ -643,15 +640,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
if (TLI.isTypeLegal(StVT)) {
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
} else {
// The in-memory type isn't legal. Truncate to the type it would promote
// to, and then do a truncstore.
Value = DAG.getNode(ISD::TRUNCATE, dl,
TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
Value);
- Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- StVT, Alignment, MMOFlags, AAInfo);
+ Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
}
ReplaceNode(SDValue(Node, 0), Result);
@@ -721,7 +719,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
- unsigned Alignment = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -748,9 +745,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo);
+ SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), NVT,
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
Ch = Result.getValue(1); // The chain.
@@ -788,16 +785,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
- AAInfo);
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
- AAInfo);
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -817,16 +813,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
- AAInfo);
+ LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
- AAInfo);
+ ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -933,7 +928,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Result.getValueType(),
Result, DAG.getValueType(SrcVT));
else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
Value = ValRes;
Chain = Result.getValue(1);
break;
@@ -1009,6 +1004,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
@@ -1131,7 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: {
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1383,19 +1381,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
// Store the subvector.
- Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
+ Ch = DAG.getStore(
+ Ch, dl, Part, SubStackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ assert((Node->getOpcode() == ISD::BUILD_VECTOR ||
+ Node->getOpcode() == ISD::CONCAT_VECTORS) &&
+ "Unexpected opcode!");
+
// We can't handle this case efficiently. Allocate a sufficiently
- // aligned object on the stack, store each element into it, then load
+ // aligned object on the stack, store each operand into it, then load
// the result as a vector.
// Create the stack frame object.
EVT VT = Node->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
+ EVT MemVT = isa<BuildVectorSDNode>(Node) ? VT.getVectorElementType()
+ : Node->getOperand(0).getValueType();
SDLoc dl(Node);
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1404,7 +1409,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
- unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -1413,16 +1418,15 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
- Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl);
+ SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl);
// If the destination vector element type is narrower than the source
// element type, only store the bits necessary.
- if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ if (MemVT.bitsLT(Node->getOperand(i).getValueType()))
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset), EltVT));
- } else
+ PtrInfo.getWithOffset(Offset), MemVT));
+ else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
Idx, PtrInfo.getWithOffset(Offset)));
}
@@ -1600,13 +1604,17 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
- unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign =
- DAG.getSubtarget().getFrameLowering()->getStackAlignment();
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
- if (Align > StackAlign)
+ Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();
+ const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering();
+ unsigned Opc =
+ TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
+ ISD::ADD : ISD::SUB;
+
+ Align StackAlign = TFL->getStackAlign();
+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value
+ if (Alignment > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
- DAG.getConstant(-(uint64_t)Align, dl, VT));
+ DAG.getConstant(-Alignment.value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
@@ -1968,7 +1976,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx =
DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
return DAG.getLoad(
VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@@ -2360,36 +2368,34 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
- // word offset constant for Hi/Lo address computation
- SDValue WordOff = DAG.getConstant(sizeof(int), dl,
- StackSlot.getValueType());
- // set up Hi and Lo (into buffer) address based on endian
- SDValue Hi = StackSlot;
- SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
- StackSlot, WordOff);
- if (DAG.getDataLayout().isLittleEndian())
- std::swap(Hi, Lo);
-
+ SDValue Lo = Op0;
// if signed map to unsigned space
- SDValue Op0Mapped;
if (isSigned) {
- // constant used to invert sign bit (signed to unsigned mapping)
- SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
- Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
- } else {
- Op0Mapped = Op0;
+ // Invert sign bit (signed to unsigned mapping).
+ Lo = DAG.getNode(ISD::XOR, dl, MVT::i32, Lo,
+ DAG.getConstant(0x80000000u, dl, MVT::i32));
}
- // store the lo of the constructed double - based on integer input
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo,
+ // Initial hi portion of constructed double.
+ SDValue Hi = DAG.getConstant(0x43300000u, dl, MVT::i32);
+
+ // If this a big endian target, swap the lo and high data.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue MemChain = DAG.getEntryNode();
+
+ // Store the lo of the constructed double.
+ SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
- // initial hi portion of constructed double
- SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
- // store the hi of the constructed double - biased exponent
+ // Store the hi of the constructed double.
+ SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
SDValue Store2 =
- DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo());
+ DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
+ MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+
// load the constructed double
SDValue Load =
- DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo());
+ DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo());
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2417,10 +2423,65 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
}
return Result;
}
- assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
- // FIXME: This can produce slightly incorrect results. See details in
- // FIXME: https://reviews.llvm.org/D69275
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+ // TODO: Generalize this for use with other types.
+ if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) {
+ LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n");
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundisf in compiler_rt. That method
+ // should be valid for i32->f32 as well.
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT = getSetCCResultType(SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout());
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Op0);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DestVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ }
+
+ return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
+ }
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DestVT. Check that the mantissa
+ // size of DestVT is >= than the number of bits in SrcVT -1.
+ assert(APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(DestVT)) >=
+ SrcVT.getSizeInBits() - 1 &&
+ "Cannot perform lossless SINT_TO_FP!");
SDValue Tmp1;
if (Node->isStrictFPOpcode()) {
@@ -2454,9 +2515,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue CPIdx =
DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
- Alignment = std::min(Alignment, 4u);
+ Alignment = commonAlignment(Alignment, 4);
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(
@@ -2765,6 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
break;
case ISD::EH_RETURN:
case ISD::EH_LABEL:
@@ -3090,14 +3152,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
unsigned Idx = Mask[i];
if (Idx < NumElems)
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getVectorIdxConstant(Idx, dl)));
else
- Ops.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
- DAG.getConstant(Idx - NumElems, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops.push_back(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getVectorIdxConstant(Idx - NumElems, dl)));
}
Tmp1 = DAG.getBuildVector(VT, dl, Ops);
@@ -3219,6 +3279,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
}
break;
+ case ISD::STRICT_FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
+ {Node->getValueType(0), MVT::Other},
+ {Res.getValue(1), Res});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ }
+ break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
@@ -3273,26 +3348,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UREM:
- case ISD::SREM: {
- EVT VT = Node->getValueType(0);
- bool isSigned = Node->getOpcode() == ISD::SREM;
- unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
- Tmp2 = Node->getOperand(0);
- Tmp3 = Node->getOperand(1);
- if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
- SDVTList VTs = DAG.getVTList(VT, VT);
- Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
- Results.push_back(Tmp1);
- } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
- // X % Y -> X-X/Y*Y
- Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
- Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ case ISD::SREM:
+ if (TLI.expandREM(Node, Tmp1, DAG))
Results.push_back(Tmp1);
- }
break;
- }
case ISD::UDIV:
case ISD::SDIV: {
bool isSigned = Node->getOpcode() == ISD::SDIV;
@@ -3420,7 +3479,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
Node->getOperand(0),
Node->getOperand(1),
@@ -3457,8 +3518,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
// Add of the sum and the carry.
+ SDValue One = DAG.getConstant(1, dl, VT);
SDValue CarryExt =
- DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+ DAG.getNode(ISD::AND, dl, VT, DAG.getZExtOrTrunc(Carry, dl, VT), One);
SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
// Second check for overflow. If we are adding, we can only overflow if the
@@ -3780,12 +3842,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 8> Scalars;
for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue Sh = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(0), DAG.getVectorIdxConstant(Idx, dl));
+ SDValue Sh =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(),
+ Node->getOperand(1), DAG.getVectorIdxConstant(Idx, dl));
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
@@ -3867,7 +3929,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
// FIXME: Check flags on the node to see if we can use a finite call.
- bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath;
unsigned Opc = Node->getOpcode();
switch (Opc) {
case ISD::ATOMIC_FENCE: {
@@ -3976,68 +4037,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FLOG:
case ISD::STRICT_FLOG:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
- ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
- RTLIB::LOG_FINITE_F64,
- RTLIB::LOG_FINITE_F80,
- RTLIB::LOG_FINITE_F128,
- RTLIB::LOG_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80,
+ RTLIB::LOG_F128, RTLIB::LOG_PPCF128, Results);
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
- ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
- RTLIB::LOG2_FINITE_F64,
- RTLIB::LOG2_FINITE_F80,
- RTLIB::LOG2_FINITE_F128,
- RTLIB::LOG2_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128, Results);
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
- ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
- RTLIB::LOG10_FINITE_F64,
- RTLIB::LOG10_FINITE_F80,
- RTLIB::LOG10_FINITE_F128,
- RTLIB::LOG10_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128, Results);
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
- ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
- RTLIB::EXP_FINITE_F64,
- RTLIB::EXP_FINITE_F80,
- RTLIB::EXP_FINITE_F128,
- RTLIB::EXP_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80,
+ RTLIB::EXP_F128, RTLIB::EXP_PPCF128, Results);
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
- ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
- RTLIB::EXP2_FINITE_F64,
- RTLIB::EXP2_FINITE_F80,
- RTLIB::EXP2_FINITE_F128,
- RTLIB::EXP2_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results);
break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
@@ -4079,6 +4100,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128, Results);
break;
+ case ISD::FROUNDEVEN:
+ case ISD::STRICT_FROUNDEVEN:
+ ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128, Results);
+ break;
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC;
@@ -4107,16 +4136,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
case ISD::FPOW:
case ISD::STRICT_FPOW:
- if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
- ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
- RTLIB::POW_FINITE_F64,
- RTLIB::POW_FINITE_F80,
- RTLIB::POW_FINITE_F128,
- RTLIB::POW_FINITE_PPCF128, Results);
- else
- ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128, Results);
+ ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80,
+ RTLIB::POW_F128, RTLIB::POW_PPCF128, Results);
break;
case ISD::LROUND:
case ISD::STRICT_LROUND:
@@ -4181,6 +4202,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
}
break;
+ case ISD::STRICT_FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(
+ DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions,
+ SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ }
+ break;
+ }
case ISD::FP_TO_FP16: {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
@@ -4188,6 +4220,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::STRICT_FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unable to expand strict_fp_to_fp16");
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
+ CallOptions, SDLoc(Node), Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
case ISD::FSUB:
case ISD::STRICT_FSUB:
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
@@ -4289,8 +4334,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
- // Zero extend the argument.
- Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Zero extend the argument unless its cttz, then use any_extend.
+ if (Node->getOpcode() == ISD::CTTZ ||
+ Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ else
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+
if (Node->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -4552,6 +4602,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FTRUNC:
case ISD::FNEG:
case ISD::FSQRT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f191160dee4fc..7e8ad28f9b143 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -113,6 +113,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
case ISD::STRICT_FROUND:
case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
case ISD::STRICT_FSQRT:
@@ -125,6 +127,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -184,6 +187,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -609,6 +618,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
RTLIB::ROUND_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32,
@@ -658,8 +676,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags,
- L->getAAInfo());
+ L->getPointerInfo(), NVT, L->getOriginalAlign(),
+ MMOFlags, L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -669,8 +687,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(),
- MMOFlags, L->getAAInfo());
+ L->getPointerInfo(), L->getMemoryVT(),
+ L->getOriginalAlign(), MMOFlags, L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -1166,10 +1184,13 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
case ISD::STRICT_FROUND:
case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break;
case ISD::STRICT_FSIN:
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
case ISD::STRICT_FSQRT:
@@ -1459,6 +1480,17 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
RTLIB::POWI_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FREEZE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FREEZE, dl, Hi.getValueType(), Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
@@ -1485,6 +1517,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
RTLIB::ROUND_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUNDEVEN_F32,
+ RTLIB::ROUNDEVEN_F64,
+ RTLIB::ROUNDEVEN_F80,
+ RTLIB::ROUNDEVEN_F128,
+ RTLIB::ROUNDEVEN_PPCF128), Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2117,6 +2159,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -2328,12 +2371,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
- SDLoc(N), L->getChain(), L->getBasePtr(),
- L->getOffset(), L->getPointerInfo(), IVT,
- L->getAlignment(),
- L->getMemOperand()->getFlags(),
- L->getAAInfo());
+ SDValue newL = DAG.getLoad(
+ L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N),
+ L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT,
+ L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo());
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
@@ -2412,3 +2453,421 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
}
+//===----------------------------------------------------------------------===//
+// Half Result Soft Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue R = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soft promote this operator's result!");
+
+ case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCBRT:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FREEZE:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FROUNDEVEN:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;
+
+ case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break;
+
+ case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
+ case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ }
+
+ if (R.getNode())
+ SetSoftPromotedHalf(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) {
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(), NewOp,
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+ Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
+ if (N->isStrictFPOpcode()) {
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+ }
+
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+
+ // Load the value as an integer value with the same number of bits.
+ assert(L->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension!");
+ SDValue NewL =
+ DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16,
+ SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), MVT::i16, L->getOriginalAlign(),
+ L->getMemOperand()->getFlags(), L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) {
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1,
+ Op2);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) {
+ SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
+ SDValue Op3 = GetSoftPromotedHalf(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), Op2.getValueType(),
+ N->getOperand(0), N->getOperand(1), Op2, Op3,
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+ // Round the value to the softened type.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(MVT::i16);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+ SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1);
+
+ // Convert back to FP16 as an integer.
+ return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
+}
+
+//===----------------------------------------------------------------------===//
+// Half Operand Soft Promotion
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
+ return false;
+ }
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a soft promotion-requiring floating point result, need to be
+ // legalized to use the soft promoted float operand. Nodes that produce at
+ // least one soft promotion-requiring floating point result have their
+ // operands legalized as a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to soft promote this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break;
+ case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
+ case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break;
+ case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break;
+ }
+
+ if (!Res.getNode())
+ return false;
+
+ assert(Res.getNode() != N && "Expected a new node!");
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) {
+ SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType());
+
+ Op1 = GetSoftPromotedHalf(Op1);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0),
+ Op1);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0));
+
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N),
+ {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op});
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 0 && "Can only soften the comparison values");
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1,
+ N->getOperand(2), N->getOperand(3), N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType());
+
+ Op0 = GetSoftPromotedHalf(Op0);
+ Op1 = GetSoftPromotedHalf(Op1);
+
+ // Promote to the larger FP type.
+ Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0);
+ Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1);
+
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode);
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ assert(!ST->isTruncatingStore() && "Unexpected truncating store.");
+ SDValue Promoted = GetSoftPromotedHalf(Val);
+ return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(),
+ ST->getMemOperand());
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0e46f8d68f839..74071f763dbf0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -91,6 +91,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+ case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break;
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
@@ -161,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
case ISD::SDIVFIX:
- case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break;
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
@@ -198,6 +201,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_UMIN:
Res = PromoteIntRes_VECREDUCE(N);
break;
+
+ case ISD::FREEZE:
+ Res = PromoteIntRes_FREEZE(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -271,8 +278,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
return Res.getValue(1);
}
- SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ // Op2 is used for the comparison and thus must be extended according to the
+ // target's atomic operations. Op3 is merely stored and so can be left alone.
+ SDValue Op2 = N->getOperand(2);
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ switch (TLI.getExtendForAtomicCmpSwapArg()) {
+ case ISD::SIGN_EXTEND:
+ Op2 = SExtPromotedInteger(Op2);
+ break;
+ case ISD::ZERO_EXTEND:
+ Op2 = ZExtPromotedInteger(Op2);
+ break;
+ case ISD::ANY_EXTEND:
+ Op2 = GetPromotedInteger(Op2);
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
SDVTList VTs =
DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
SDValue Res = DAG.getAtomicCmpSwap(
@@ -303,6 +326,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
case TargetLowering::TypeSoftenFloat:
// Promote the integer operand by hand.
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypeSoftPromoteHalf:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
if (!NOutVT.isVector())
@@ -318,6 +344,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypeSplitVector: {
if (!NOutVT.isVector()) {
// For example, i32 = BITCAST v2i16 on alpha. Convert the split
@@ -370,9 +398,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
OutVT.getVectorNumElements() * Scale);
if (isTypeLegal(WideOutVT)) {
InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
- DAG.getConstant(0, dl, IdxTy));
+ DAG.getVectorIdxConstant(0, dl));
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp);
}
}
@@ -396,6 +423,12 @@ static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
return ShiftVT;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
+ SDValue V = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::FREEZE, SDLoc(N),
+ V.getValueType(), V);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
@@ -558,7 +591,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- return DAG.getNode(N->getOpcode(), dl, NVT);
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
}
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
@@ -578,8 +617,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
DAG.getValueType(N->getOperand(0).getValueType()));
if (N->getOpcode() == ISD::ZERO_EXTEND)
- return DAG.getZeroExtendInReg(Res, dl,
- N->getOperand(0).getValueType().getScalarType());
+ return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
return Res;
}
@@ -781,22 +819,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
N->getOperand(2));
}
+static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
+ unsigned SatW, bool Signed,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT VT = V.getValueType();
+ unsigned VTW = VT.getScalarSizeInBits();
+
+ if (!Signed) {
+ // Saturate to the unsigned maximum by getting the minimum of V and the
+ // maximum.
+ return DAG.getNode(ISD::UMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
+ dl, VT));
+ }
+
+ // Saturate to the signed maximum (the low SatW - 1 bits) by taking the
+ // signed minimum of it and V.
+ V = DAG.getNode(ISD::SMIN, dl, VT, V,
+ DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
+ dl, VT));
+ // Saturate to the signed minimum (the high SatW + 1 bits) by taking the
+ // signed maximum of it and V.
+ V = DAG.getNode(ISD::SMAX, dl, VT, V,
+ DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
+ dl, VT));
+ return V;
+}
+
static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
- unsigned Scale, const TargetLowering &TLI,
- SelectionDAG &DAG) {
+ unsigned Scale, const TargetLowering &TLI,
+ SelectionDAG &DAG, unsigned SatW = 0) {
EVT VT = LHS.getValueType();
- bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ unsigned VTSize = VT.getScalarSizeInBits();
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
SDLoc dl(N);
- // See if we can perform the division in this type without widening.
- if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
- DAG))
- return V;
-
- // If that didn't work, double the type width and try again. That must work,
- // or something is wrong.
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getScalarSizeInBits() * 2);
+ // Widen the types by a factor of two. This is guaranteed to expand, since it
+ // will always have enough high bits in the LHS to shift into.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
if (Signed) {
LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
@@ -805,18 +872,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
}
- // TODO: Saturation.
-
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
DAG);
assert(Res && "Expanding DIVFIX with wide type failed?");
+ if (Saturating) {
+ // If the caller has told us to saturate at something less, use that width
+ // instead of the type before doubling. However, it cannot be more than
+ // what we just widened!
+ assert(SatW <= VTSize &&
+ "Tried to saturate to more than the original type?");
+ Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
+ TLI, DAG);
+ }
return DAG.getZExtOrTrunc(Res, dl, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
SDLoc dl(N);
SDValue Op1Promoted, Op2Promoted;
- bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+ N->getOpcode() == ISD::SDIVFIXSAT;
+ bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+ N->getOpcode() == ISD::UDIVFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -827,23 +904,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
EVT PromotedType = Op1Promoted.getValueType();
unsigned Scale = N->getConstantOperandVal(2);
- SDValue Res;
// If the type is already legal and the operation is legal in that type, we
// should not early expand.
if (TLI.isTypeLegal(PromotedType)) {
TargetLowering::LegalizeAction Action =
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
- if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
- Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
- Op2Promoted, N->getOperand(2));
+ if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ unsigned Diff = PromotedType.getScalarSizeInBits() -
+ N->getValueType(0).getScalarSizeInBits();
+ if (Saturating)
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(Diff, dl, ShiftTy));
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
+ DAG.getConstant(Diff, dl, ShiftTy));
+ return Res;
+ }
}
- if (!Res)
- Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
-
- // TODO: Saturation.
-
- return Res;
+ // See if we can perform the division in this type without expanding.
+ if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
+ Op2Promoted, Scale, DAG)) {
+ if (Saturating)
+ Res = SaturateWidenedDIVFIX(Res, dl,
+ N->getValueType(0).getScalarSizeInBits(),
+ Signed, TLI, DAG);
+ return Res;
+ }
+ // If we cannot, expand it to twice the type width. If we are saturating, give
+ // it the original width as a saturating width so we don't need to emit
+ // two saturations.
+ return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
+ N->getValueType(0).getScalarSizeInBits());
}
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
@@ -1048,8 +1143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
// Extract the low NVT subvector.
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
}
}
@@ -1076,7 +1170,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
// Calculate the overflow flag: zero extend the arithmetic result from
// the original type.
- SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
@@ -1181,6 +1275,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
+ EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SDValue Chain = N->getOperand(0); // Get the chain.
SDValue Ptr = N->getOperand(1); // Get the pointer.
@@ -1233,7 +1334,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
-
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
@@ -1307,7 +1407,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1330,10 +1432,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ const bool IsStrictFp = N->isStrictFPOpcode();
+ assert(Res.getValueType() == N->getValueType(0) &&
+ N->getNumValues() == (IsStrictFp ? 2 : 1) &&
"Invalid operand expansion");
+ LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: ";
+ Res.dump());
ReplaceValueWith(SDValue(N, 0), Res);
+ if (IsStrictFp)
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+
return false;
}
@@ -1614,7 +1723,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[OpNo] = Mask;
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
@@ -1635,7 +1751,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
+ if (Res == N)
+ return SDValue(Res, 0);
+
+ // Update triggered CSE, do our own replacement since caller can't.
+ ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1676,8 +1799,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl,
- N->getOperand(0).getValueType().getScalarType());
+ return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
@@ -1786,6 +1908,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
@@ -1908,7 +2031,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::SDIVFIX:
- case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -2666,10 +2791,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
unsigned NBitWidth = NVT.getSizeInBits();
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+ Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
+ SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
@@ -2683,6 +2813,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -2706,6 +2842,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+ Op = GetSoftPromotedHalf(Op);
+ Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ }
+
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -2800,7 +2942,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- unsigned Alignment = N->getAlignment();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
@@ -2811,7 +2952,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
EVT MemVT = N->getMemoryVT();
Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
- Alignment, MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -2833,8 +2974,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
- AAInfo);
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2845,7 +2986,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2863,7 +3004,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- Alignment, MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
@@ -2871,7 +3012,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -3226,8 +3367,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
- N->getConstantOperandVal(2), TLI, DAG);
+ SDLoc dl(N);
+ // Try expanding in the existing type first.
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
+ N->getOperand(1),
+ N->getConstantOperandVal(2), DAG);
+
+ if (!Res)
+ Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+ N->getConstantOperandVal(2), TLI, DAG);
SplitInteger(Res, Lo, Hi);
}
@@ -4071,7 +4219,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- unsigned Alignment = N->getAlignment();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
@@ -4082,15 +4229,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- N->getMemoryVT(), Alignment, MMOFlags, AAInfo);
+ N->getMemoryVT(), N->getOriginalAlign(), MMOFlags,
+ AAInfo);
}
if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
- AAInfo);
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getOriginalAlign(), MMOFlags, AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -4099,9 +4247,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getTruncStore(
- Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -4129,8 +4277,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
}
// Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment,
- MMOFlags, AAInfo);
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT,
+ N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
@@ -4138,7 +4286,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ N->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -4186,18 +4334,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- unsigned OutNumElems = OutVT.getVectorNumElements();
EVT NOutVTElem = NOutVT.getVectorElementType();
SDLoc dl(N);
SDValue BaseIdx = N->getOperand(1);
+ // TODO: We may be able to use this for types other than scalable
+ // vectors and fix those tests that expect BUILD_VECTOR to be used
+ if (OutVT.isScalableVector()) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ // Promote operands and see if this is handled by target lowering,
+ // Otherwise, use the BUILD_VECTOR approach below
+ if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
+ // Collect the (promoted) operands
+ SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx };
+
+ EVT PromEltVT = Ops[0].getValueType().getVectorElementType();
+ assert(PromEltVT.bitsLE(NOutVTElem) &&
+ "Promoted operand has an element type greater than result");
+
+ EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT);
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+ }
+
+ if (OutVT.isScalableVector())
+ report_fatal_error("Unable to promote scalable types using BUILD_VECTOR");
+
SDValue InOp0 = N->getOperand(0);
if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
InOp0 = GetPromotedInteger(N->getOperand(0));
EVT InVT = InOp0.getValueType();
+ unsigned OutNumElems = OutVT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
@@ -4319,9 +4492,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
"Unexpected number of elements");
for (unsigned j = 0; j < NumElem; ++j) {
- SDValue Ext = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
+ DAG.getVectorIdxConstant(j, dl));
Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
}
}
@@ -4429,9 +4601,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
for (unsigned i=0; i<NumElem; ++i) {
// Extract element from incoming vector
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getVectorIdxConstant(i, dl));
SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
NewOps.push_back(Tr);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 63ddb59fce68c..ae087d3bbd8cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -124,6 +124,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
Mapped |= 128;
if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
Mapped |= 256;
+ if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end())
+ Mapped |= 512;
if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -168,12 +170,15 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << " WidenedVectors";
if (Mapped & 256)
dbgs() << " PromotedFloats";
+ if (Mapped & 512)
+ dbgs() << " SoftPromoteHalfs";
dbgs() << "\n";
llvm_unreachable(nullptr);
}
}
}
+#ifndef NDEBUG
// Checked that NewNodes are only used by other NewNodes.
for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
SDNode *N = NewNodes[i];
@@ -181,6 +186,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
UI != UE; ++UI)
assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
}
+#endif
}
/// This is the main entry point for the type legalizer. This does a top-down
@@ -239,6 +245,9 @@ bool DAGTypeLegalizer::run() {
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal result type\n");
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
// with a legal type). Results can be remapped using ReplaceValueWith,
@@ -276,6 +285,10 @@ bool DAGTypeLegalizer::run() {
PromoteFloatResult(N, i);
Changed = true;
goto NodeDone;
+ case TargetLowering::TypeSoftPromoteHalf:
+ SoftPromoteHalfResult(N, i);
+ Changed = true;
+ goto NodeDone;
}
}
@@ -297,6 +310,9 @@ ScanOperands:
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal operand\n");
continue;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error(
+ "Scalarization of scalable vectors is not supported.");
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
// operands in place, and return "true".
@@ -332,6 +348,10 @@ ScanOperands:
NeedsReanalyzing = PromoteFloatOperand(N, i);
Changed = true;
break;
+ case TargetLowering::TypeSoftPromoteHalf:
+ NeedsReanalyzing = SoftPromoteHalfOperand(N, i);
+ Changed = true;
+ break;
}
break;
}
@@ -719,6 +739,16 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
OpIdEntry = getTableId(Result);
}
+void DAGTypeLegalizer::SetSoftPromotedHalf(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() == MVT::i16 &&
+ "Invalid type for soft-promoted half");
+ AnalyzeNewValue(Result);
+
+ auto &OpIdEntry = SoftPromotedHalfs[getTableId(Op)];
+ assert((OpIdEntry == 0) && "Node is already promoted!");
+ OpIdEntry = getTableId(Result);
+}
+
void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// Note that in some cases vector operation operands may be greater than
// the vector element type. For example BUILD_VECTOR of type <1 x i1> with
@@ -805,9 +835,9 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
SDValue Hi) {
assert(Lo.getValueType().getVectorElementType() ==
- Op.getValueType().getVectorElementType() &&
- 2*Lo.getValueType().getVectorNumElements() ==
- Op.getValueType().getVectorNumElements() &&
+ Op.getValueType().getVectorElementType() &&
+ Lo.getValueType().getVectorElementCount() * 2 ==
+ Op.getValueType().getVectorElementCount() &&
Hi.getValueType() == Lo.getValueType() &&
"Invalid type for split vector");
// Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
@@ -859,12 +889,19 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
SDLoc dl(Op);
// Create the stack frame object. Make sure it is aligned for both
// the source and destination types.
- SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align DestAlign = DAG.getReducedAlign(DestVT, /*UseABI=*/false);
+ Align OpAlign = DAG.getReducedAlign(Op.getValueType(), /*UseABI=*/false);
+ Align Align = std::max(DestAlign, OpAlign);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(Op.getValueType().getStoreSize(), Align);
// Emit a store to the stack slot.
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo());
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), Align);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo());
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align);
}
/// Replace the node's results with custom code provided by the target and
@@ -890,17 +927,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
// The target didn't want to custom lower it after all.
return false;
- // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
- // provide the same kind of custom splitting behavior.
- if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
- // We've legalized a return type by splitting it. If there is a chain,
- // replace that too.
- SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
- if (N->getNumValues() > 1)
- ReplaceValueWith(SDValue(N, 1), Results[2]);
- return true;
- }
-
// Make everything that once used N's values now use those in Results instead.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index faae14444d51c..0fa6d653a8364 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -109,6 +109,10 @@ private:
/// supported precision, this map indicates what promoted value to use.
SmallDenseMap<TableId, TableId, 8> PromotedFloats;
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates the converted value to use.
+ SmallDenseMap<TableId, TableId, 8> SoftPromotedHalfs;
+
/// For float nodes that need to be expanded this map indicates which operands
/// are the expanded version of the input.
SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;
@@ -155,7 +159,9 @@ private:
const SDValue &getSDValue(TableId &Id) {
RemapId(Id);
assert(Id && "TableId should be non-zero");
- return IdToValueMap[Id];
+ auto I = IdToValueMap.find(Id);
+ assert(I != IdToValueMap.end() && "cannot find Id in map");
+ return I->second;
}
public:
@@ -172,24 +178,30 @@ public:
bool run();
void NoteDeletion(SDNode *Old, SDNode *New) {
+ assert(Old != New && "node replaced with self");
for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
TableId NewId = getTableId(SDValue(New, i));
TableId OldId = getTableId(SDValue(Old, i));
- if (OldId != NewId)
+ if (OldId != NewId) {
ReplacedValues[OldId] = NewId;
- // Delete Node from tables.
+ // Delete Node from tables. We cannot do this when OldId == NewId,
+ // because NewId can still have table references to it in
+ // ReplacedValues.
+ IdToValueMap.erase(OldId);
+ PromotedIntegers.erase(OldId);
+ ExpandedIntegers.erase(OldId);
+ SoftenedFloats.erase(OldId);
+ PromotedFloats.erase(OldId);
+ SoftPromotedHalfs.erase(OldId);
+ ExpandedFloats.erase(OldId);
+ ScalarizedVectors.erase(OldId);
+ SplitVectors.erase(OldId);
+ WidenedVectors.erase(OldId);
+ }
+
ValueToIdMap.erase(SDValue(Old, i));
- IdToValueMap.erase(OldId);
- PromotedIntegers.erase(OldId);
- ExpandedIntegers.erase(OldId);
- SoftenedFloats.erase(OldId);
- PromotedFloats.erase(OldId);
- ExpandedFloats.erase(OldId);
- ScalarizedVectors.erase(OldId);
- SplitVectors.erase(OldId);
- WidenedVectors.erase(OldId);
}
}
@@ -260,7 +272,7 @@ private:
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
Op = GetPromotedInteger(Op);
- return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ return DAG.getZeroExtendInReg(Op, dl, OldVT);
}
// Get a promoted operand and sign or zero extend it to the final size
@@ -274,7 +286,7 @@ private:
if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType()))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op,
DAG.getValueType(OldVT));
- return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType());
+ return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
// Integer Result Promotion.
@@ -304,6 +316,7 @@ private:
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
@@ -326,6 +339,7 @@ private:
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_VSCALE(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
@@ -512,9 +526,11 @@ private:
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
SDValue SoftenFloatRes_FROUND(SDNode *N);
+ SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N);
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
@@ -584,9 +600,11 @@ private:
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -651,6 +669,43 @@ private:
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
//===--------------------------------------------------------------------===//
+ // Half soft promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetSoftPromotedHalf(SDValue Op) {
+ TableId &PromotedId = SoftPromotedHalfs[getTableId(Op)];
+ SDValue PromotedOp = getSDValue(PromotedId);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetSoftPromotedHalf(SDValue Op, SDValue Result);
+
+ void SoftPromoteHalfResult(SDNode *N, unsigned ResNo);
+ SDValue SoftPromoteHalfRes_BinOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N);
+ SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
+ SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_FPOWI(SDNode *N);
+ SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
+ SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
+ SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N);
+ SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
+ SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
+
+ bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
+ SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
+ SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
// Scalarization Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
@@ -721,6 +776,11 @@ private:
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+ // Helper function for incrementing the pointer when splitting
+ // memory operations
+ void IncrementPointer(MemSDNode *N, EVT MemVT,
+ MachinePointerInfo &MPI, SDValue &Ptr);
+
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -918,6 +978,7 @@ private:
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVSETCC(const SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c45c62cabc05b..9cd3b8f76d6ca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -50,6 +50,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteInteger:
break;
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
case TargetLowering::TypeSoftenFloat:
@@ -82,6 +83,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypeWidenVector: {
assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
@@ -119,9 +122,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SmallVector<SDValue, 8> Vals;
for (unsigned i = 0; i < NumElems; ++i)
- Vals.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getVectorIdxConstant(i, dl)));
// Build Lo, Hi pair by pairing extracted elements if needed.
unsigned Slot = 0;
@@ -154,9 +156,13 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Create the stack frame object. Make sure it is aligned for both
// the source and expanded destination types.
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
- NOutVT.getTypeForEVT(*DAG.getContext()));
- SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align InAlign = DAG.getReducedAlign(InVT, /*UseABI=*/false);
+ Align NOutAlign = DAG.getReducedAlign(NOutVT, /*UseABI=*/false);
+ Align Align = std::max(InAlign, NOutAlign);
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Align);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
@@ -165,7 +171,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, NOutAlign);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -173,8 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize));
+ PtrInfo.getWithOffset(IncrementSize), NOutAlign);
// Handle endianness of the load.
if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
@@ -251,21 +256,20 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- unsigned Alignment = LD->getAlignment();
AAMDNodes AAInfo = LD->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment,
- LD->getMemOperand()->getFlags(), AAInfo);
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ AAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), AAInfo);
+ Hi = DAG.getLoad(
+ NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -462,7 +466,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- unsigned Alignment = St->getAlignment();
AAMDNodes AAInfo = St->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -474,14 +477,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
- St->getMemOperand()->getFlags(), AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(),
+ AAInfo);
Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getStore(Chain, dl, Hi, Ptr,
- St->getPointerInfo().getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize),
- St->getMemOperand()->getFlags(), AAInfo);
+ Hi = DAG.getStore(
+ Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
+ St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -558,3 +561,12 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getUNDEF(LoVT);
Hi = DAG.getUNDEF(HiVT);
}
+
+void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7d0b1ee6ae073..6409f924920dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -142,9 +142,10 @@ class VectorLegalizer {
void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- SDValue ExpandFixedPointDiv(SDNode *Node);
+ void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue ExpandStrictFPOp(SDNode *Node);
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -182,9 +183,7 @@ bool VectorLegalizer::Run() {
E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
// Check if the values of the nodes contain vectors. We don't need to check
// the operands because we are going to check their values at some point.
- for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
- J != E; ++J)
- HasVectors |= J->isVector();
+ HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
// If we found a vector node we can start the legalization.
if (HasVectors)
@@ -318,12 +317,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
}
- bool HasVectorValueOrOp = false;
- for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
- HasVectorValueOrOp |= J->isVector();
- for (const SDValue &Oper : Node->op_values())
- HasVectorValueOrOp |= Oper.getValueType().isVector();
-
+ bool HasVectorValueOrOp =
+ llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
+ llvm::any_of(Node->op_values(),
+ [](SDValue O) { return O.getValueType().isVector(); });
if (!HasVectorValueOrOp)
return TranslateLegalizeResults(Op, Node);
@@ -339,7 +336,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Expand;
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
ValVT = Node->getValueType(0);
@@ -431,6 +428,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FFLOOR:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
@@ -463,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
- case ISD::UDIVFIX: {
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -704,132 +704,7 @@ void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
-
- EVT SrcVT = LD->getMemoryVT();
- EVT SrcEltVT = SrcVT.getScalarType();
- unsigned NumElem = SrcVT.getVectorNumElements();
-
- SDValue NewChain;
- SDValue Value;
- if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
- SDLoc dl(N);
-
- SmallVector<SDValue, 8> Vals;
- SmallVector<SDValue, 8> LoadChains;
-
- EVT DstEltVT = LD->getValueType(0).getScalarType();
- SDValue Chain = LD->getChain();
- SDValue BasePTR = LD->getBasePtr();
- ISD::LoadExtType ExtType = LD->getExtensionType();
-
- // When elements in a vector is not byte-addressable, we cannot directly
- // load each element by advancing pointer, which could only address bytes.
- // Instead, we load all significant words, mask bits off, and concatenate
- // them to form each element. Finally, they are extended to destination
- // scalar type to build the destination vector.
- EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
-
- assert(WideVT.isRound() &&
- "Could not handle the sophisticated case when the widest integer is"
- " not power of 2.");
- assert(WideVT.bitsGE(SrcEltVT) &&
- "Type is not legalized?");
-
- unsigned WideBytes = WideVT.getStoreSize();
- unsigned Offset = 0;
- unsigned RemainingBytes = SrcVT.getStoreSize();
- SmallVector<SDValue, 8> LoadVals;
- while (RemainingBytes > 0) {
- SDValue ScalarLoad;
- unsigned LoadBytes = WideBytes;
-
- if (RemainingBytes >= LoadBytes) {
- ScalarLoad =
- DAG.getLoad(WideVT, dl, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
- } else {
- EVT LoadVT = WideVT;
- while (RemainingBytes < LoadBytes) {
- LoadBytes >>= 1; // Reduce the load size by half.
- LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
- }
- ScalarLoad =
- DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset), LoadVT,
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
- }
-
- RemainingBytes -= LoadBytes;
- Offset += LoadBytes;
-
- BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
-
- LoadVals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
-
- unsigned BitOffset = 0;
- unsigned WideIdx = 0;
- unsigned WideBits = WideVT.getSizeInBits();
-
- // Extract bits, pack and extend/trunc them into destination type.
- unsigned SrcEltBits = SrcEltVT.getSizeInBits();
- SDValue SrcEltBitMask = DAG.getConstant(
- APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
-
- for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
- assert(BitOffset < WideBits && "Unexpected offset!");
-
- SDValue ShAmt = DAG.getConstant(
- BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
-
- BitOffset += SrcEltBits;
- if (BitOffset >= WideBits) {
- WideIdx++;
- BitOffset -= WideBits;
- if (BitOffset > 0) {
- ShAmt = DAG.getConstant(
- SrcEltBits - BitOffset, dl,
- TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- SDValue Hi =
- DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
- }
- }
-
- Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
-
- switch (ExtType) {
- default: llvm_unreachable("Unknown extended-load op!");
- case ISD::EXTLOAD:
- Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
- break;
- case ISD::ZEXTLOAD:
- Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
- break;
- case ISD::SEXTLOAD:
- ShAmt =
- DAG.getConstant(WideBits - SrcEltBits, dl,
- TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
- Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
- Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
- break;
- }
- Vals.push_back(Lo);
- }
-
- NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getBuildVector(N->getValueType(0), dl, Vals);
- } else {
- std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
- }
-
- return std::make_pair(Value, NewChain);
+ return TLI.scalarizeVectorLoad(LD, DAG);
}
SDValue VectorLegalizer::ExpandStore(SDNode *N) {
@@ -968,9 +843,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::SDIVFIX:
case ISD::UDIVFIX:
- Results.push_back(ExpandFixedPointDiv(Node));
+ ExpandFixedPointDiv(Node, Results);
return;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::SDIVFIXSAT:
+ case ISD::UDIVFIXSAT:
+ break;
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
ExpandStrictFPOp(Node, Results);
@@ -990,6 +868,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMIN:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
+ case ISD::SREM:
+ case ISD::UREM:
+ ExpandREM(Node, Results);
+ return;
}
Results.push_back(DAG.UnrollVectorOp(Node));
@@ -1087,9 +969,8 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
NumSrcElements);
- Src = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
}
// Build a base mask of undef shuffles.
@@ -1147,9 +1028,8 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
NumSrcElements);
- Src = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
+ Src, DAG.getVectorIdxConstant(0, DL));
}
// Build up a zero vector to blend into this one.
@@ -1456,12 +1336,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node,
Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDNode *N = Node;
if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(N);
+ Results.push_back(Expanded);
}
void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
@@ -1478,6 +1358,17 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
UnrollStrictFPOp(Node, Results);
}
+void VectorLegalizer::ExpandREM(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
+ "Expected REM node");
+
+ SDValue Result;
+ if (!TLI.expandREM(Node, Result, DAG))
+ Result = DAG.UnrollVectorOp(Node);
+ Results.push_back(Result);
+}
+
void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
@@ -1500,8 +1391,7 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
SmallVector<SDValue, 32> OpChains;
for (unsigned i = 0; i < NumElems; ++i) {
SmallVector<SDValue, 4> Opers;
- SDValue Idx = DAG.getConstant(i, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Idx = DAG.getVectorIdxConstant(i, dl);
// The Chain is the first operand.
Opers.push_back(Chain);
@@ -1551,12 +1441,10 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
SDLoc dl(Node);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Ops[i] = DAG.getNode(ISD::SETCC, dl,
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 974914d00d052..414ba25ffd5ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -20,10 +20,11 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -88,11 +89,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -147,7 +150,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_TernaryOp(N);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
R = ScalarizeVecRes_StrictFPOp(N);
@@ -166,7 +169,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
R = ScalarizeVecRes_FIX(N);
break;
}
@@ -187,8 +192,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = GetScalarizedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- Op0.getValueType(), Op0, Op1, Op2);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
@@ -196,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
- Op2);
+ Op2, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
@@ -221,7 +226,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
Opers[i] = Oper;
}
- SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
+ Opers, N->getFlags());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -251,6 +257,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
ResVT.getVectorElementType(), OvVT.getVectorElementType());
SDNode *ScalarNode = DAG.getNode(
N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+ ScalarNode->setFlags(N->getFlags());
// Replace the other vector result not being explicitly scalarized here.
unsigned OtherNo = 1 - ResNo;
@@ -331,8 +338,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
- N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
- N->getAAInfo());
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -357,11 +363,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
Op = GetScalarizedVector(Op);
} else {
EVT VT = OpVT.getVectorElementType();
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getVectorIdxConstant(0, DL));
}
- return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
@@ -383,9 +388,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getVectorIdxConstant(0, DL));
}
switch (N->getOpcode()) {
@@ -421,9 +425,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
Cond = GetScalarizedVector(Cond);
} else {
EVT VT = OpVT.getVectorElementType();
- Cond = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getVectorIdxConstant(0, DL));
}
SDValue LHS = GetScalarizedVector(N->getOperand(1));
@@ -523,12 +526,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
RHS = GetScalarizedVector(RHS);
} else {
EVT VT = OpVT.getVectorElementType();
- LHS = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- RHS = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getVectorIdxConstant(0, DL));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getVectorIdxConstant(0, DL));
}
// Turn it into a scalar SETCC.
@@ -749,12 +750,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
return DAG.getTruncStore(
N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(), N->getAlignment(),
+ N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(),
N->getMemOperand()->getFlags(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
- N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getOriginalAlign(), N->getMemOperand()->getFlags(),
N->getAAInfo());
}
@@ -881,12 +882,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG2:
case ISD::FNEARBYINT:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
@@ -942,7 +945,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
SplitVecRes_StrictFPOp(N, Lo, Hi);
@@ -961,7 +964,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
case ISD::SDIVFIX:
+ case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
+ case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
}
@@ -971,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SetSplitVector(SDValue(N, ResNo), Lo, Hi);
}
+void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
+ MachinePointerInfo &MPI,
+ SDValue &Ptr) {
+ SDLoc DL(N);
+ unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
+
+ if (MemVT.isScalableVector()) {
+ SDValue BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
+ } else {
+ MPI = N->getPointerInfo().getWithOffset(IncrementSize);
+ // Increment the pointer to the other half.
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -995,10 +1019,10 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
- Op0Lo, Op1Lo, Op2Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
- Op0Hi, Op1Hi, Op2Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo,
+ Op2Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi,
+ Op2Hi, N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
@@ -1010,8 +1034,10 @@ void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Op2 = N->getOperand(2);
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2);
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
+ N->getFlags());
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
+ N->getFlags());
}
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
@@ -1030,6 +1056,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypeScalarizeVector:
case TargetLowering::TypeWidenVector:
@@ -1055,6 +1082,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
}
// In the general case, convert the input to an integer and split it by hand.
@@ -1116,9 +1145,9 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
- DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl));
}
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -1137,40 +1166,45 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// boundary between the halves, we can avoid spilling the vector, and insert
// into the lower half of the split vector directly.
// TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
- // the index is constant and there is no boundary crossing. But those cases
- // don't seem to get hit in practice.
- if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
- unsigned IdxVal = ConstIdx->getZExtValue();
- if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
- return;
- }
+ // there is no boundary crossing. But those cases don't seem to get hit in
+ // practice.
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
}
// Spill the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Store the new subvector into the specified index.
SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
+ MachinePointerInfo::getUnknownStack(MF));
// Load the Lo part from the stack slot.
- Lo =
- DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo,
+ SmallestAlign);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -1291,8 +1325,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
EVT LoValueVTs[] = {LoVT, MVT::Other};
EVT HiValueVTs[] = {HiVT, MVT::Other};
- Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi);
+ Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
+ N->getFlags());
// Build a factor node to remember that this Op is independent of the
// other one.
@@ -1332,10 +1368,8 @@ SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
EVT OperandVT = Operand.getValueType();
if (OperandVT.isVector()) {
EVT OperandEltVT = OperandVT.getVectorElementType();
- Operands[j] =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(
- DAG.getDataLayout())));
+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, DAG.getVectorIdxConstant(i, dl));
} else {
Operands[j] = Operand;
}
@@ -1384,6 +1418,8 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+ LoNode->setFlags(N->getFlags());
+ HiNode->setFlags(N->getFlags());
Lo = SDValue(LoNode, ResNo);
Hi = SDValue(HiNode, ResNo);
@@ -1417,10 +1453,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
else
- Hi =
- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
- DAG.getConstant(IdxVal - LoNumElts, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
return;
}
@@ -1442,36 +1476,38 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
}
// Spill the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
- MachinePointerInfo::getUnknownStack(MF), EltVT);
+ Store = DAG.getTruncStore(
+ Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
+ commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
// Increment the pointer to the other part.
unsigned IncrementSize = LoVT.getSizeInBits() / 8;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl,
- StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize),
- MinAlign(Alignment, IncrementSize));
+ PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
// If we adjusted the original type, we need to truncate the results.
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1502,21 +1538,29 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT MemoryVT = LD->getMemoryVT();
- unsigned Alignment = LD->getOriginalAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return;
+ }
+
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
+ LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
+ MMOFlags, AAInfo);
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
- Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
+ HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1541,7 +1585,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
assert(Offset.isUndef() && "Unexpected indexed masked load offset");
SDValue Mask = MLD->getMask();
SDValue PassThru = MLD->getPassThru();
- unsigned Alignment = MLD->getOriginalAlignment();
+ Align Alignment = MLD->getOriginalAlign();
ISD::LoadExtType ExtType = MLD->getExtensionType();
// Split Mask operand
@@ -1557,7 +1601,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1565,27 +1611,33 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
MLD->isExpandingLoad());
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
- MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
- HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
- MLD->getRanges());
-
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT,
- MMO, MLD->getAddressingMode(), ExtType,
- MLD->isExpandingLoad());
+ if (HiIsEmpty) {
+ // The hi masked load has zero storage size. We therefore simply set it to
+ // the low masked load and rely on subsequent removal from the chain.
+ Hi = Lo;
+ } else {
+ // Generate hi masked load.
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ MLD->isExpandingLoad());
+ unsigned HiOffset = LoMemVT.getStoreSize();
+
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MLD->getPointerInfo().getWithOffset(HiOffset),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
+ HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
+ }
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1610,7 +1662,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
- unsigned Alignment = MGT->getOriginalAlignment();
+ Align Alignment = MGT->getOriginalAlign();
// Split Mask operand
SDValue MaskLo, MaskHi;
@@ -1623,11 +1675,6 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
}
- EVT MemoryVT = MGT->getMemoryVT();
- EVT LoMemVT, HiMemVT;
- // Split MemoryVT
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(PassThru, PassThruLo, PassThruHi);
@@ -1640,10 +1687,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MGT->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
@@ -1708,11 +1755,13 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1),
+ N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1),
+ N->getFlags());
} else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags());
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags());
}
}
@@ -1737,8 +1786,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
// more effectively move in the right direction and prevent falling down
// to scalarization in many cases due to the input vector being split too
// far.
- unsigned NumElements = SrcVT.getVectorNumElements();
- if ((NumElements & 1) == 0 &&
+ if ((SrcVT.getVectorMinNumElements() & 1) == 0 &&
SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
@@ -1851,9 +1899,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Idx -= Input * NewElts;
// Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input],
+ DAG.getVectorIdxConstant(Idx, dl)));
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1882,11 +1930,11 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue SV = N->getOperand(2);
SDLoc dl(N);
- const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
- NVT.getTypeForEVT(*DAG.getContext()));
+ const Align Alignment =
+ DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext()));
- Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
- Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value());
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value());
Chain = Hi.getValue(1);
// Modified the chain - switch anything that used the old chain to use
@@ -2160,8 +2208,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
} else {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
- DAG.getConstant(IdxVal - LoElts, dl,
- Idx.getValueType()));
+ DAG.getVectorIdxConstant(IdxVal - LoElts, dl));
}
}
@@ -2200,11 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
}
// Store the vector to the stack.
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ // In cases where the vector is illegal it will be broken down into parts
+ // and stored in parts - we should use the alignment for the smallest part.
+ Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
+ SDValue StackPtr =
+ DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ SmallestAlign);
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
@@ -2219,7 +2271,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
+ commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -2244,7 +2297,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Scale = MGT->getScale();
SDValue Mask = MGT->getMask();
SDValue PassThru = MGT->getPassThru();
- unsigned Alignment = MGT->getOriginalAlignment();
+ Align Alignment = MGT->getOriginalAlign();
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2269,21 +2322,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MGT->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
OpsLo, MMO, MGT->getIndexType());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(),
- MGT->getRanges());
-
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
OpsHi, MMO, MGT->getIndexType());
@@ -2312,13 +2359,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
assert(Offset.isUndef() && "Unexpected indexed masked store offset");
SDValue Mask = N->getMask();
SDValue Data = N->getValue();
- EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
@@ -2337,32 +2380,45 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
}
- SDValue Lo, Hi;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ EVT MemoryVT = N->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
+
+ SDValue Lo, Hi, Res;
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
N->isCompressingStore());
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- N->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ if (HiIsEmpty) {
+ // The hi masked store has zero storage size.
+ // Only the lo masked store is needed.
+ Res = Lo;
+ } else {
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
- N->getRanges());
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
+ unsigned HiOffset = LoMemVT.getStoreSize();
- Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
- N->getAddressingMode(), N->isTruncatingStore(),
- N->isCompressingStore());
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
+ HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges());
- // Build a factor node to remember that this store is independent of the
- // other one.
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+
+ return Res;
}
SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
@@ -2373,13 +2429,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Index = N->getIndex();
SDValue Scale = N->getScale();
SDValue Data = N->getValue();
- EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
// Split all operands
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2406,20 +2459,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
SDValue Lo;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
DL, OpsLo, MMO, N->getIndexType());
- MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
-
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
@@ -2437,7 +2484,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
+ Align Alignment = N->getOriginalAlign();
MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
@@ -2450,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
return TLI.scalarizeVectorStore(N, DAG);
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-
if (isTruncating)
Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
Alignment, MMOFlags, AAInfo);
@@ -2459,17 +2504,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
AAInfo);
- // Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ MachinePointerInfo MPI;
+ IncrementPointer(N, LoMemVT, MPI, Ptr);
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,
HiMemVT, Alignment, MMOFlags, AAInfo);
else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
- Alignment, MMOFlags, AAInfo);
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -2487,9 +2529,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
for (const SDValue &Op : N->op_values()) {
for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
i != e; ++i) {
- Elts.push_back(DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getVectorIdxConstant(i, DL)));
}
}
@@ -2565,9 +2606,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
SDValue Chain;
if (N->isStrictFPOpcode()) {
HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
- {N->getOperand(0), HalfLo});
+ {N->getOperand(0), InLoVec});
HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
- {N->getOperand(0), HalfHi});
+ {N->getOperand(0), InHiVec});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
@@ -2611,9 +2652,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo0, Hi0);
GetSplitVector(N->getOperand(1), Lo1, Hi1);
- unsigned PartElements = Lo0.getValueType().getVectorNumElements();
- EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
- EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+ auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+
+ LLVMContext &Context = *DAG.getContext();
+ EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
+ EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
@@ -2753,7 +2796,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryWithExtraScalarOp(N);
break;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN:
#include "llvm/IR/ConstrainedOps.def"
Res = WidenVecRes_StrictFP(N);
@@ -2813,6 +2856,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FRINT:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC: {
@@ -2842,6 +2886,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::FNEG:
+ case ISD::FREEZE:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
@@ -2924,9 +2969,8 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue VecOp = DAG.getUNDEF(NextVT);
unsigned NumToInsert = ConcatEnd - Idx - 1;
for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl));
}
ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
@@ -3008,12 +3052,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue EOp2 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getVectorIdxConstant(Idx, dl));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
Idx += NumElts;
CurNumElts -= NumElts;
@@ -3025,12 +3067,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue EOp2 = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getVectorIdxConstant(Idx, dl));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getVectorIdxConstant(Idx, dl));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2, Flags);
}
@@ -3108,14 +3148,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SmallVector<SDValue, 4> EOps;
-
+
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
-
- if (Op.getValueType().isVector())
- Op = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ if (Op.getValueType().isVector())
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
}
@@ -3140,10 +3179,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
SDValue Op = InOps[i];
if (Op.getValueType().isVector())
- Op = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
- DAG.getConstant(Idx, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
}
@@ -3190,8 +3227,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
*DAG.getContext(), ResVT.getVectorElementType(),
WideOvVT.getVectorNumElements());
- SDValue Zero = DAG.getConstant(
- 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
WideLHS = DAG.getNode(
ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
N->getOperand(0), Zero);
@@ -3210,8 +3246,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
} else {
- SDValue Zero = DAG.getConstant(
- 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Zero = DAG.getVectorIdxConstant(0, DL);
SDValue OtherVal = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
@@ -3274,9 +3309,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
if (InVTNumElts % WidenNumElts == 0) {
- SDValue InVal = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -3291,9 +3325,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
- SDValue Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
else
@@ -3310,7 +3343,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
- SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
EVT InVT = InOp.getValueType();
EVT InEltVT = InVT.getVectorElementType();
@@ -3321,16 +3353,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+ std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 32> OpChains;
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
for (unsigned i=0; i < MinElts; ++i) {
- NewOps[1] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, DL));
Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
@@ -3370,7 +3401,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
SmallVector<SDValue, 16> Ops;
for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
- DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(i, DL));
switch (Opcode) {
case ISD::ANY_EXTEND_VECTOR_INREG:
Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
@@ -3463,6 +3494,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
+ case TargetLowering::TypeScalarizeScalableVector:
+ report_fatal_error("Scalarization of scalable vectors is not supported.");
case TargetLowering::TypePromoteInteger: {
// If the incoming type is a vector that is being promoted, then
// we know that the elements are arranged differently and that we
@@ -3492,6 +3525,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
}
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftPromoteHalf:
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat:
case TargetLowering::TypeScalarizeVector:
@@ -3626,10 +3660,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue InOp = N->getOperand(i);
if (InputWidened)
InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -3666,11 +3699,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- for (i=0; i < NumElts; ++i)
- Ops[i] =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal + i, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + i, dl));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -3689,6 +3720,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!LD->getMemoryVT().isByteSized()) {
+ SDValue Value, NewChain;
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+ ReplaceValueWith(SDValue(LD, 0), Value);
+ ReplaceValueWith(SDValue(LD, 1), NewChain);
+ return SDValue();
+ }
+
SDValue Result;
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
if (ExtType != ISD::NON_EXTLOAD)
@@ -3877,8 +3922,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
// Adjust Mask to the right number of elements.
unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask));
Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
ZeroIdx);
} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
@@ -4144,12 +4188,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
{Chain, LHSElem, RHSElem, CC});
@@ -4288,13 +4330,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
"We can't have the same type as we started with!");
if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
- InOp = DAG.getNode(
- ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getVectorIdxConstant(0, DL));
else
- InOp = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getVectorIdxConstant(0, DL));
break;
}
}
@@ -4363,9 +4404,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
else
Res = DAG.getNode(Opcode, dl, WideVT, InOp);
}
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getVectorIdxConstant(0, dl));
}
EVT InEltVT = InVT.getVectorElementType();
@@ -4376,9 +4416,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
SmallVector<SDValue, 32> OpChains;
for (unsigned i=0; i < NumElts; ++i) {
- NewOps[1] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getVectorIdxConstant(i, dl));
Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
OpChains.push_back(Ops[i].getValue(1));
}
@@ -4386,11 +4425,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
for (unsigned i = 0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(
- Opcode, dl, EltVT,
- DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT,
+ InOp, DAG.getVectorIdxConstant(i, dl)));
}
return DAG.getBuildVector(VT, dl, Ops);
@@ -4411,9 +4448,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
- return DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getVectorIdxConstant(0, dl));
}
}
@@ -4430,7 +4466,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(0, dl));
}
}
}
@@ -4470,10 +4506,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
TargetLowering::TypeWidenVector &&
"Unexpected type action");
InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ for (unsigned j = 0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(j, dl));
}
return DAG.getBuildVector(VT, dl, Ops);
}
@@ -4630,9 +4665,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
SVT.getVectorElementType(),
VT.getVectorNumElements());
- SDValue CC = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getVectorIdxConstant(0, dl));
EVT OpVT = N->getOperand(0).getValueType();
ISD::NodeType ExtendCode =
@@ -4657,12 +4691,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
SmallVector<SDValue, 8> Chains(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
- SDValue LHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- SDValue RHSElem = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getVectorIdxConstant(i, dl));
+ SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getVectorIdxConstant(i, dl));
Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
{Chain, LHSElem, RHSElem, CC});
@@ -4716,11 +4748,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
break;
case ISD::VECREDUCE_FMAX:
NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::infinity(), dl, ElemVT);
+ -std::numeric_limits<double>::infinity(), dl, ElemVT);
break;
case ISD::VECREDUCE_FMIN:
NeutralElem = DAG.getConstantFP(
- -std::numeric_limits<double>::infinity(), dl, ElemVT);
+ std::numeric_limits<double>::infinity(), dl, ElemVT);
break;
}
@@ -4729,7 +4761,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
unsigned WideElts = WideVT.getVectorNumElements();
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ DAG.getVectorIdxConstant(Idx, dl));
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
}
@@ -4748,9 +4780,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
LeftIn, RightIn);
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getVectorIdxConstant(0, DL));
}
//===----------------------------------------------------------------------===//
@@ -4836,7 +4867,6 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
SmallVectorImpl<SDValue> &LdOps,
unsigned Start, unsigned End) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc dl(LdOps[Start]);
EVT LdTy = LdOps[Start].getValueType();
unsigned Width = VecTy.getSizeInBits();
@@ -4856,9 +4886,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
}
- VecOp = DAG.getNode(
- ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
- DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getVectorIdxConstant(Idx++, dl));
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
@@ -4879,19 +4908,19 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
- unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
+ // Allow wider loads.
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- Align, MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
@@ -4934,7 +4963,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
// Later code assumes the vector loads produced will be mergeable, so we
@@ -4952,7 +4981,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -5029,7 +5058,6 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -5043,14 +5071,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
unsigned Increment = LdEltVT.getSizeInBits() / 8;
Ops[0] =
DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
- LdEltVT, Align, MMOFlags, AAInfo);
+ LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- Align, MMOFlags, AAInfo);
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -5069,7 +5097,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
@@ -5093,12 +5120,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
if (NewVT.isVector()) {
unsigned NumVTElts = NewVT.getVectorNumElements();
do {
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getVectorIdxConstant(Idx, dl));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Offset), MMOFlags, AAInfo));
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
@@ -5113,13 +5139,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
// Readjust index position based on new vector type.
Idx = Idx * ValEltWidth / NewVTWidth;
do {
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
- DAG.getConstant(Idx++, dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getVectorIdxConstant(Idx++, dl));
StChain.push_back(DAG.getStore(
Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(Align, Offset), MMOFlags, AAInfo));
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
@@ -5137,7 +5161,6 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
@@ -5157,21 +5180,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
EVT ValEltVT = ValVT.getVectorElementType();
unsigned Increment = ValEltVT.getSizeInBits() / 8;
unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo(), StEltVT, Align,
- MMOFlags, AAInfo));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getVectorIdxConstant(0, dl));
+ StChain.push_back(
+ DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT,
+ ST->getOriginalAlign(), MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
- SDValue EOp = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getVectorIdxConstant(0, dl));
StChain.push_back(DAG.getTruncStore(
Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo));
}
}
@@ -5206,9 +5227,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
}
if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getVectorIdxConstant(0, dl));
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -5216,9 +5236,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
unsigned Idx;
for (Idx = 0; Idx < MinNumElts; ++Idx)
- Ops[Idx] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getVectorIdxConstant(Idx, dl));
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
DAG.getUNDEF(EltVT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 34660e3a48ec5..55fe26eb64cda 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -19,9 +19,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 7ee44c808fcbf..2902c96c7658c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -761,7 +761,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
- DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index ff806bdb822c2..72e68a5045c69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -279,7 +279,7 @@ private:
SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.MarkDirty();
+ Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}
@@ -289,7 +289,7 @@ private:
SUnit *NewNode = Clone(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.MarkDirty();
+ Topo.AddSUnitWithoutPredecessors(NewNode);
return NewNode;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 0e4d783e3505d..ce20d506586f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -198,10 +199,10 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
- SDNode *Chain = nullptr;
+ SDValue Chain;
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
- Chain = Node->getOperand(NumOps-1).getNode();
+ Chain = Node->getOperand(NumOps-1);
if (!Chain)
return;
@@ -234,6 +235,9 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
unsigned UseCount = 0;
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
I != E && UseCount < 100; ++I, ++UseCount) {
+ if (I.getUse().getResNo() != Chain.getResNo())
+ continue;
+
SDNode *User = *I;
if (User == Node || !Visited.insert(User).second)
continue;
@@ -471,6 +475,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
+ unsigned DefIdx = N->getOperand(i).getResNo();
if (isPassiveNode(OpN)) continue; // Not scheduled.
SUnit *OpSU = &SUnits[OpN->getNodeId()];
assert(OpSU && "Node has no SUnit!");
@@ -505,7 +510,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
computeOperandLatency(OpN, N, i, Dep);
- ST.adjustSchedDependency(OpSU, SU, Dep);
+ ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
}
if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
@@ -731,7 +736,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
static void
ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
- DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
+ DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) {
if (!N->getHasDebugValue())
return;
@@ -758,9 +763,9 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// instructions in the right order.
static void
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<SDValue, Register> &VRBaseMap,
SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
- SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
+ SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) {
unsigned Order = N->getIROrder();
if (!Order || Seen.count(Order)) {
// Process any valid SDDbgValues even if node does not have any order
@@ -784,17 +789,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
void ScheduleDAGSDNodes::
-EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
MachineBasicBlock::iterator InsertPos) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
if (I->getSUnit()->CopyDstRC) {
// Copy to physical register.
- DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit());
assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
// Find the destination physical register.
- unsigned Reg = 0;
+ Register Reg;
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) {
if (II->isCtrl()) continue; // ignore chain preds
@@ -826,17 +831,17 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
InstrEmitter Emitter(BB, InsertPos);
- DenseMap<SDValue, unsigned> VRBaseMap;
- DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ DenseMap<SDValue, Register> VRBaseMap;
+ DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
- SmallSet<unsigned, 8> Seen;
+ SmallSet<Register, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
// Emit a node, and determine where its first instruction is for debuginfo.
// Zero, one, or multiple instructions can be created when emitting a node.
auto EmitNode =
[&](SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+ DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * {
// Fetch instruction prior to this, or end() if nonexistant.
auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
if (I == BB->begin())
@@ -863,9 +868,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MI = &*std::next(Before);
}
- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ if (MI->isCandidateForCallSiteEntry() &&
+ DAG->getTarget().Options.EmitCallSiteInfo)
MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+ if (DAG->getNoMergeSiteInfo(Node)) {
+ MI->setFlag(MachineInstr::MIFlag::NoMerge);
+ }
+
return MI;
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 183ce4b0652d0..8c28ce403c9bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -184,7 +184,7 @@ class InstrItineraryData;
void BuildSchedUnits();
void AddSchedEdges();
- void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
MachineBasicBlock::iterator InsertPos);
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 313e07b5fdd6f..592c09c10fb08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -543,7 +544,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::ConstantPool:
case ISD::TargetConstantPool: {
const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
- ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getAlign().value());
ID.AddInteger(CP->getOffset());
if (CP->isMachineConstantPoolEntry())
CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
@@ -1000,12 +1001,12 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
return Node;
}
-unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+Align SelectionDAG::getEVTAlign(EVT VT) const {
Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return getDataLayout().getABITypeAlignment(Ty);
+ return getDataLayout().getABITypeAlign(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
@@ -1167,15 +1168,21 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
}
SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
- assert(!VT.isVector() &&
- "getZeroExtendInReg should use the vector element type instead of "
- "the vector type!");
- if (Op.getValueType().getScalarType() == VT) return Op;
- unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt Imm = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
- return getNode(ISD::AND, DL, Op.getValueType(), Op,
- getConstant(Imm, DL, Op.getValueType()));
+ EVT OpVT = Op.getValueType();
+ assert(VT.isInteger() && OpVT.isInteger() &&
+ "Cannot getZeroExtendInReg FP types");
+ assert(VT.isVector() == OpVT.isVector() &&
+ "getZeroExtendInReg type should be vector iff the operand "
+ "type is vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == OpVT.getVectorElementCount()) &&
+ "Vector element counts must match in getZeroExtendInReg");
+ assert(VT.bitsLE(OpVT) && "Not extending!");
+ if (OpVT == VT)
+ return Op;
+ APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
+ return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
}
SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
@@ -1332,10 +1339,16 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
const SDLoc &DL, bool LegalTypes) {
+ assert(VT.isInteger() && "Shift amount is not an integer type!");
EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
return getConstant(Val, DL, ShiftVT);
}
+SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget);
+}
+
SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
@@ -1381,7 +1394,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
else if (EltVT == MVT::f64)
return getConstantFP(APFloat(Val), DL, VT, isTarget);
else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
- EltVT == MVT::f16) {
+ EltVT == MVT::f16 || EltVT == MVT::bf16) {
bool Ignored;
APFloat APF = APFloat(Val);
APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
@@ -1459,19 +1472,18 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
}
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
- unsigned Alignment, int Offset,
- bool isTarget,
- unsigned TargetFlags) {
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
- if (Alignment == 0)
+ if (!Alignment)
Alignment = shouldOptForSize()
- ? getDataLayout().getABITypeAlignment(C->getType())
- : getDataLayout().getPrefTypeAlignment(C->getType());
+ ? getDataLayout().getABITypeAlign(C->getType())
+ : getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
- ID.AddInteger(Alignment);
+ ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
ID.AddPointer(C);
ID.AddInteger(TargetFlags);
@@ -1479,25 +1491,26 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new constant pool: ", this);
+ return V;
}
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
- unsigned Alignment, int Offset,
- bool isTarget,
- unsigned TargetFlags) {
+ MaybeAlign Alignment, int Offset,
+ bool isTarget, unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
- if (Alignment == 0)
- Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ if (!Alignment)
+ Alignment = getDataLayout().getPrefTypeAlign(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
- ID.AddInteger(Alignment);
+ ID.AddInteger(Alignment->value());
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
@@ -1505,7 +1518,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment,
TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
@@ -1861,9 +1874,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
- assert((!V || V->getType()->isPointerTy()) &&
- "SrcValue is not a pointer?");
-
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);
@@ -1921,6 +1931,10 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getFreeze(SDValue V) {
+ return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V);
+}
+
/// getShiftAmountOperand - Return the specified value casted to
/// the target's desired shift amount type.
SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
@@ -1979,28 +1993,54 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
MachinePointerInfo(VD));
}
-SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
- MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- unsigned ByteSize = VT.getStoreSize();
+Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
+ const DataLayout &DL = getDataLayout();
Type *Ty = VT.getTypeForEVT(*getContext());
- unsigned StackAlign =
- std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
+ Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+
+ if (TLI->isTypeLegal(VT) || !VT.isVector())
+ return RedAlign;
+
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ const Align StackAlign = TFI->getStackAlign();
+
+ // See if we can choose a smaller ABI alignment in cases where it's an
+ // illegal vector type that will get broken down.
+ if (RedAlign > StackAlign) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ Ty = IntermediateVT.getTypeForEVT(*getContext());
+ Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty);
+ if (RedAlign2 < RedAlign)
+ RedAlign = RedAlign2;
+ }
+
+ return RedAlign;
+}
- int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
+SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ Align StackAlign =
+ std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign));
+ return CreateStackTemporary(VT.getStoreSize(), StackAlign);
+}
+
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
- unsigned Align =
- std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
-
- MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
- return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
+ Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2));
+ return CreateStackTemporary(Bytes, Align);
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
@@ -2179,21 +2219,16 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
break;
case ISD::Constant: {
- auto *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV && "Const value should be ConstSDNode.");
- const APInt &CVal = CV->getAPIntValue();
+ const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
if (NewVal != CVal)
return getConstant(NewVal, SDLoc(V), V.getValueType());
break;
}
- case ISD::OR:
- case ISD::XOR:
- case ISD::SIGN_EXTEND_INREG:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
- *this, 0);
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
@@ -2224,19 +2259,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
}
break;
}
- case ISD::ANY_EXTEND: {
- SDValue Src = V.getOperand(0);
- unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
- // Being conservative here - only peek through if we only demand bits in the
- // non-extended source (even though the extended bits are technically
- // undef).
- if (DemandedBits.getActiveBits() > SrcBitWidth)
- break;
- APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
- if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
- return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
- break;
- }
}
return SDValue();
}
@@ -2253,11 +2275,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
unsigned Depth) const {
- EVT VT = V.getValueType();
- APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
- : APInt(1, 1);
- return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero);
}
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
@@ -2276,15 +2294,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
}
/// isSplatValue - Return true if the vector V has the same value
-/// across all DemandedElts.
+/// across all DemandedElts. For scalable vectors it does not make
+/// sense to specify which elements are demanded or undefined, therefore
+/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt &UndefElts) {
- if (!DemandedElts)
- return false; // No demanded elts, better to assume we don't know anything.
-
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
+ if (!VT.isScalableVector() && !DemandedElts)
+ return false; // No demanded elts, better to assume we don't know anything.
+
+ // Deal with some common cases here that work for both fixed and scalable
+ // vector types.
+ switch (V.getOpcode()) {
+ case ISD::SPLAT_VECTOR:
+ return true;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND: {
+ APInt UndefLHS, UndefRHS;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ UndefElts = UndefLHS | UndefRHS;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // We don't support other cases than those above for scalable vectors at
+ // the moment.
+ if (VT.isScalableVector())
+ return false;
+
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
UndefElts = APInt::getNullValue(NumElts);
@@ -2326,30 +2371,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
}
case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
SDValue Src = V.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- APInt UndefSrcElts;
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) {
- UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
- return true;
- }
- }
- break;
- }
- case ISD::ADD:
- case ISD::SUB:
- case ISD::AND: {
- APInt UndefLHS, UndefRHS;
- SDValue LHS = V.getOperand(0);
- SDValue RHS = V.getOperand(1);
- if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
- isSplatValue(RHS, DemandedElts, UndefRHS)) {
- UndefElts = UndefLHS | UndefRHS;
+ APInt UndefSrcElts;
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) {
+ UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
}
break;
@@ -2363,10 +2392,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
- unsigned NumElts = VT.getVectorNumElements();
APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts;
+
+ // For now we don't support this with scalable vectors.
+ if (!VT.isScalableVector())
+ DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2379,19 +2411,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
switch (Opcode) {
default: {
APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts;
+
+ if (!VT.isScalableVector())
+ DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+
if (isSplatValue(V, DemandedElts, UndefElts)) {
- // Handle case where all demanded elements are UNDEF.
- if (DemandedElts.isSubsetOf(UndefElts)) {
+ if (VT.isScalableVector()) {
+ // DemandedElts and UndefElts are ignored for scalable vectors, since
+ // the only supported cases are SPLAT_VECTOR nodes.
SplatIdx = 0;
- return getUNDEF(VT);
+ } else {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
}
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
return V;
}
break;
}
+ case ISD::SPLAT_VECTOR:
+ SplatIdx = 0;
+ return V;
case ISD::VECTOR_SHUFFLE: {
+ if (VT.isScalableVector())
+ return SDValue();
+
// Check if this is a shuffle node doing a splat.
// TODO - remove this and rely purely on SelectionDAG::isSplatValue,
// getTargetVShiftNode currently struggles without the splat source.
@@ -2413,14 +2461,16 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
SrcVector.getValueType().getScalarType(), SrcVector,
- getIntPtrConstant(SplatIdx, SDLoc(V)));
+ getVectorIdxConstant(SplatIdx, SDLoc(V)));
return SDValue();
}
-/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
-/// is less than the element bit-width of the shift node, return it.
-static const APInt *getValidShiftAmountConstant(SDValue V,
- const APInt &DemandedElts) {
+const APInt *
+SelectionDAG::getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
unsigned BitWidth = V.getScalarValueSizeInBits();
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
// Shifting more than the bitwidth is not valid.
@@ -2431,10 +2481,13 @@ static const APInt *getValidShiftAmountConstant(SDValue V,
return nullptr;
}
-/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
-/// than the element bit-width of the shift node, return the minimum value.
-static const APInt *
-getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+const APInt *SelectionDAG::getValidMinimumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
@@ -2457,10 +2510,13 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
return MinShAmt;
}
-/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
-/// than the element bit-width of the shift node, return the maximum value.
-static const APInt *
-getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
+ SDValue V, const APInt &DemandedElts) const {
+ assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
+ V.getOpcode() == ISD::SRA) &&
+ "Unknown shift node");
+ if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts))
+ return ValidAmt;
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
@@ -2488,6 +2544,14 @@ getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
/// every vector element.
KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+
+ // TOOD: Until we have a plan for how to represent demanded elements for
+ // scalable vectors, we can just bail out for now.
+ if (Op.getValueType().isScalableVector()) {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ return KnownBits(BitWidth);
+ }
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -2503,6 +2567,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
KnownBits Known(BitWidth); // Don't know anything.
+ // TOOD: Until we have a plan for how to represent demanded elements for
+ // scalable vectors, we can just bail out for now.
+ if (Op.getValueType().isScalableVector())
+ return Known;
+
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
Known.One = C->getAPIntValue();
@@ -2622,52 +2691,40 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we know the element index, demand any elements from the subvector and
- // the remainder from the src its inserted into, otherwise demand them all.
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
- Known.One.setAllBits();
- Known.Zero.setAllBits();
- uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
- if (!!DemandedSubElts) {
- Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
- if (Known.isUnknown())
- break; // early-out.
- }
- APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
- APInt DemandedSrcElts = DemandedElts & ~SubMask;
- if (!!DemandedSrcElts) {
- Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
- } else {
- Known = computeKnownBits(Sub, Depth + 1);
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (!!DemandedSubElts) {
+ Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1);
if (Known.isUnknown())
break; // early-out.
- Known2 = computeKnownBits(Src, Depth + 1);
+ }
+ if (!!DemandedSrcElts) {
+ Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- // If we know the element index, just demand that subvector elements,
- // otherwise demand them all.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
break;
}
case ISD::SCALAR_TO_VECTOR: {
@@ -2753,35 +2810,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
case ISD::AND:
- // If either the LHS or the RHS are Zero, the result is zero.
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-1 bits are only known if set in both the LHS & RHS.
- Known.One &= Known2.One;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- Known.Zero |= Known2.Zero;
+ Known &= Known2;
break;
case ISD::OR:
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
- case ISD::XOR: {
+ case ISD::XOR:
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
- Known.Zero = KnownZeroOut;
+ Known ^= Known2;
break;
- }
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -3075,12 +3120,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::ZERO_EXTEND: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
@@ -3099,9 +3144,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = Known.sext(BitWidth);
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
+ break;
+ }
case ISD::ANY_EXTEND: {
- Known = computeKnownBits(Op.getOperand(0), Depth+1);
- Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -3117,6 +3169,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One &= (~Known.Zero);
break;
}
+ case ISD::AssertAlign: {
+ unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
+ assert(LogOfAlign != 0);
+ // If a node is guaranteed to be aligned, set low zero bits accordingly as
+ // well as clearing one bits.
+ Known.Zero.setLowBits(LogOfAlign);
+ Known.One.clearLowBits(LogOfAlign);
+ break;
+ }
case ISD::FGETSIGN:
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
@@ -3134,6 +3195,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::SUBC: {
+ assert(Op.getResNo() == 0 &&
+ "We only compute knownbits for the difference here.");
+
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
@@ -3245,57 +3309,51 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT VecVT = InVec.getValueType();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
if (BitWidth > EltBitWidth)
Known = Known.trunc(EltBitWidth);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
- if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
- // If we know the element index, just demand that vector element.
- unsigned Idx = ConstEltNo->getZExtValue();
- APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- Known = computeKnownBits(InVec, DemandedElt, Depth + 1);
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Known = computeKnownBits(InVec, Depth + 1);
- }
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1);
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::INSERT_VECTOR_ELT: {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
-
- ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
- // If we know the element index, split the demand between the
- // source vector and the inserted element.
- Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
unsigned EltIdx = CEltNo->getZExtValue();
-
- // If we demand the inserted element then add its common known bits.
- if (DemandedElts[EltIdx]) {
- Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
- Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
- }
-
- // If we demand the source vector then add its common known bits, ensuring
- // that we don't demand the inserted element.
- APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
- if (!!VectorElts) {
- Known2 = computeKnownBits(InVec, VectorElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Known = computeKnownBits(InVec, Depth + 1);
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
- Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
+ Known.One &= Known2.One.zextOrTrunc(BitWidth);
+ Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth);
+ }
+ if (!!DemandedVecElts) {
+ Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
break;
}
@@ -3399,7 +3457,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
- TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
+ TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(),
+ Known, getMachineFunction());
break;
default:
@@ -3492,6 +3551,11 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+
+ // TODO: Assume we don't know anything for now.
+ if (VT.isScalableVector())
+ return 1;
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -3515,7 +3579,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
- if (!DemandedElts)
+ if (!DemandedElts || VT.isScalableVector())
return 1; // No demanded elts, better to assume we don't know anything.
unsigned Opcode = Op.getOpcode();
@@ -3535,7 +3599,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
continue;
SDValue SrcOp = Op.getOperand(i);
- Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+ Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != VTBits) {
@@ -3646,23 +3710,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
// SRA X, C -> adds C sign bits.
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts))
- Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
- else if (const APInt *ShAmt =
- getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ if (const APInt *ShAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
return Tmp;
case ISD::SHL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
+ if (const APInt *ShAmt =
+ getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
// shl destroys sign bits, ensure it doesn't shift out all sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (ShAmt->ult(Tmp))
return Tmp - ShAmt->getZExtValue();
- } else if (const APInt *ShAmt =
- getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (ShAmt->ult(Tmp))
- return Tmp - ShAmt->getZExtValue();
}
break;
case ISD::AND:
@@ -3712,18 +3770,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
// Fallback - just get the minimum number of sign bits of the operands.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1)
return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
}
case ISD::UMIN:
case ISD::UMAX:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1)
return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
case ISD::SADDO:
case ISD::UADDO:
@@ -3753,7 +3811,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ROTL:
case ISD::ROTR:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (Tmp == VTBits)
+ return VTBits;
+
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
@@ -3762,7 +3827,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we aren't rotating out all of the known-in sign bits, return the
// number that are left. This handles rotl(sext(x), 1) for example.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
}
break;
@@ -3770,13 +3834,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
- if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (ConstantSDNode *CRHS =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts))
if (CRHS->isAllOnesValue()) {
- KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1);
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -3789,18 +3855,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
- return std::min(Tmp, Tmp2)-1;
-
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
case ISD::SUB:
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
- if (Tmp2 == 1) return 1;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ if (Tmp2 == 1) return 1; // Early out.
// Handle NEG.
- if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
+ if (ConstantSDNode *CLHS =
+ isConstOrConstSplat(Op.getOperand(0), DemandedElts))
if (CLHS->isNullValue()) {
- KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1);
+ KnownBits Known =
+ computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((Known.Zero | 1).isAllOnesValue())
@@ -3816,9 +3883,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp == 1) return 1; // Early out.
- return std::min(Tmp, Tmp2)-1;
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2) - 1;
case ISD::MUL: {
// The output of the Mul can be at most twice the valid bits in the inputs.
unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3853,39 +3920,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
case ISD::INSERT_VECTOR_ELT: {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element, otherwise assume we need
+ // the original demanded vector elements and the value.
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
-
- ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ bool DemandedVal = true;
+ APInt DemandedVecElts = DemandedElts;
+ auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
- // If we know the element index, split the demand between the
- // source vector and the inserted element.
unsigned EltIdx = CEltNo->getZExtValue();
-
- // If we demand the inserted element then get its sign bits.
- Tmp = std::numeric_limits<unsigned>::max();
- if (DemandedElts[EltIdx]) {
- // TODO - handle implicit truncation of inserted elements.
- if (InVal.getScalarValueSizeInBits() != VTBits)
- break;
- Tmp = ComputeNumSignBits(InVal, Depth + 1);
- }
-
- // If we demand the source vector then get its sign bits, and determine
- // the minimum.
- APInt VectorElts = DemandedElts;
- VectorElts.clearBit(EltIdx);
- if (!!VectorElts) {
- Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- } else {
- // Unknown element index, so ignore DemandedElts and demand them all.
- Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ DemandedVal = !!DemandedElts[EltIdx];
+ DemandedVecElts.clearBit(EltIdx);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (DemandedVal) {
+ // TODO - handle implicit truncation of inserted elements.
+ if (InVal.getScalarValueSizeInBits() != VTBits)
+ break;
Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
Tmp = std::min(Tmp, Tmp2);
}
+ if (!!DemandedVecElts) {
+ Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -3906,7 +3966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
- ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
@@ -3914,18 +3974,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR: {
- // If we know the element index, just demand that subvector elements,
- // otherwise demand them all.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // Bail until we can represent demanded elements for scalable vectors.
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
@@ -3946,35 +4003,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
case ISD::INSERT_SUBVECTOR: {
- // If we know the element index, demand any elements from the subvector and
- // the remainder from the src its inserted into, otherwise demand them all.
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
- Tmp = std::numeric_limits<unsigned>::max();
- uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
- if (!!DemandedSubElts) {
- Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
- if (Tmp == 1) return 1; // early-out
- }
- APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
- APInt DemandedSrcElts = DemandedElts & ~SubMask;
- if (!!DemandedSrcElts) {
- Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
- }
- assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
- return Tmp;
- }
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
- // Not able to determine the index so just assume worst case.
- Tmp = ComputeNumSignBits(Sub, Depth + 1);
- if (Tmp == 1) return 1; // early-out
- Tmp2 = ComputeNumSignBits(Src, Depth + 1);
- Tmp = std::min(Tmp, Tmp2);
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedSubElts) {
+ Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // early-out
+ }
+ if (!!DemandedSrcElts) {
+ Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -4052,13 +4100,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return FirstAnswer;
}
- // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // Okay, we know that the sign bit in Mask is set. Use CLO to determine
// the number of identical bits in the top of the input value.
- Mask = ~Mask;
Mask <<= Mask.getBitWidth()-VTBits;
- // Return # leading zeros. We use 'min' here in case Val was zero before
- // shifting. We don't want to return '64' as for an i32 "0".
- return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+ return std::max(FirstAnswer, Mask.countLeadingOnes());
}
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
@@ -4109,6 +4154,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FROUND:
+ case ISD::FROUNDEVEN:
case ISD::FRINT:
case ISD::FNEARBYINT: {
if (SNaN)
@@ -4249,6 +4295,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
SelectionDAG &DAG) {
int NumOps = Ops.size();
assert(NumOps != 0 && "Can't build an empty vector!");
+ assert(!VT.isScalableVector() &&
+ "BUILD_VECTOR cannot be used with scalable types");
assert(VT.getVectorNumElements() == (unsigned)NumOps &&
"Incorrect element count in BUILD_VECTOR!");
@@ -4287,8 +4335,8 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
return Ops[0].getValueType() == Op.getValueType();
}) &&
"Concatenation of vectors with inconsistent value types!");
- assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
- VT.getVectorNumElements() &&
+ assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) ==
+ VT.getVectorElementCount() &&
"Incorrect element count in vector concatenation!");
if (Ops.size() == 1)
@@ -4305,11 +4353,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
bool IsIdentity = true;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
SDValue Op = Ops[i];
- unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+ unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements();
if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Op.getOperand(0).getValueType() != VT ||
(IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
- !isa<ConstantSDNode>(Op.getOperand(1)) ||
Op.getConstantOperandVal(1) != IdentityIndex) {
IsIdentity = false;
break;
@@ -4323,6 +4370,11 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
return IdentitySrc;
}
+ // The code below this point is only designed to work for fixed width
+ // vectors, so we bail out for now.
+ if (VT.isScalableVector())
+ return SDValue();
+
// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
// simplified to one big BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
@@ -4508,7 +4560,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// FIXME need to be more flexible about rounding mode.
(void)V.convert(APFloat::IEEEhalf(),
APFloat::rmNearestTiesToEven, &Ignored);
- return getConstant(V.bitcastToAPInt(), DL, VT);
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
}
}
}
@@ -4553,6 +4605,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
unsigned OpOpcode = Operand.getNode()->getOpcode();
switch (Opcode) {
+ case ISD::FREEZE:
+ assert(VT == Operand.getValueType() && "Unexpected VT!");
+ break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
@@ -4597,8 +4652,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid sext node, dst < src!");
@@ -4616,8 +4671,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid zext node, dst < src!");
@@ -4635,8 +4690,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid anyext node, dst < src!");
@@ -4665,8 +4720,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsGT(VT) &&
"Invalid truncate node, src < dst!");
@@ -4753,6 +4808,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;
+ case ISD::VSCALE:
+ assert(VT == Operand.getValueType() && "Unexpected VT!");
+ break;
}
SDNode *N;
@@ -4824,17 +4882,6 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return llvm::None;
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, const ConstantSDNode *C1,
- const ConstantSDNode *C2) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
- if (Optional<APInt> Folded =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()))
- return getConstant(Folded.getValue(), DL, VT);
- return SDValue();
-}
-
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const GlobalAddressSDNode *GA,
const SDNode *N2) {
@@ -4881,20 +4928,37 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDNode *N1, SDNode *N2) {
+ EVT VT, ArrayRef<SDValue> Ops) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
- if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
+ // For now, the array Ops should only contain two values.
+ // This enforcement will be removed once this function is merged with
+ // FoldConstantVectorArithmetic
+ if (Ops.size() != 2)
+ return SDValue();
+
+ if (isUndef(Opcode, Ops))
return getUNDEF(VT);
+ SDNode *N1 = Ops[0].getNode();
+ SDNode *N2 = Ops[1].getNode();
+
// Handle the case of two scalars.
if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -4908,8 +4972,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
return FoldSymbolOffset(Opcode, VT, GA, N1);
- // For vectors, extract each constant element and fold them individually.
- // Either input may be an undef value.
+ // TODO: All the folds below are performed lane-by-lane and assume a fixed
+ // vector width, however we should be able to do constant folds involving
+ // splat vector nodes too.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // For fixed width vectors, extract each constant element and fold them
+ // individually. Either input may be an undef value.
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
if (!BV1 && !N1->isUndef())
return SDValue();
@@ -4985,6 +5055,13 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (!VT.isVector())
return SDValue();
+ // TODO: All the folds below are performed lane-by-lane and assume a fixed
+ // vector width, however we should be able to do constant folds involving
+ // splat vector nodes too.
+ if (VT.isScalableVector())
+ return SDValue();
+
+ // From this point onwards all vectors are assumed to be fixed width.
unsigned NumElts = VT.getVectorNumElements();
auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
@@ -5107,8 +5184,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
}
switch (Opcode) {
- case ISD::FADD:
case ISD::FSUB:
+ // -0.0 - undef --> undef (consistent with "fneg undef")
+ if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef())
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+
+ case ISD::FADD:
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
@@ -5122,6 +5204,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
return SDValue();
}
+SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
+ assert(Val.getValueType().isInteger() && "Invalid AssertAlign!");
+
+ // There's no need to assert on a byte-aligned pointer. All pointers are at
+ // least byte aligned.
+ if (A == Align(1))
+ return Val;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val});
+ ID.AddInteger(A.value());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ Val.getValueType(), A);
+ createOperands(N, {Val});
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -5186,11 +5296,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2C && N2C->isNullValue())
return N1;
break;
+ case ISD::MUL:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
+ APInt N2CImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm * N2CImm);
+ }
+ break;
case ISD::UDIV:
case ISD::UREM:
case ISD::MULHU:
case ISD::MULHS:
- case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
case ISD::SMIN:
@@ -5213,7 +5332,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
- if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags))
return V;
break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
@@ -5223,6 +5342,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Invalid FCOPYSIGN!");
break;
case ISD::SHL:
+ if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) {
+ APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue();
+ APInt ShiftImm = N2C->getAPIntValue();
+ return getVScale(DL, VT, MulImm << ShiftImm);
+ }
+ LLVM_FALLTHROUGH;
case ISD::SRA:
case ISD::SRL:
if (SDValue V = simplifyShift(N1, N2))
@@ -5240,7 +5365,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
- assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
+ assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
@@ -5281,7 +5407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"SIGN_EXTEND_INREG type should be vector iff the operand "
"type is vector!");
assert((!EVT.isVector() ||
- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ EVT.getVectorElementCount() == VT.getVectorElementCount()) &&
"Vector element counts must match in SIGN_EXTEND_INREG");
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
@@ -5323,27 +5449,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.isUndef() || N2.isUndef())
return getUNDEF(VT);
- // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
- if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length
+ // vectors. For scalable vectors we will provide appropriate support for
+ // dealing with arbitrary indices.
+ if (N2C && N1.getValueType().isFixedLengthVector() &&
+ N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
- // expanding copies of large vectors from registers.
- if (N2C &&
- N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0) {
+ // expanding copies of large vectors from registers. This only works for
+ // fixed length vectors, since we need to know the exact number of
+ // elements.
+ if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) {
unsigned Factor =
N1.getOperand(0).getValueType().getVectorNumElements();
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
N1.getOperand(N2C->getZExtValue() / Factor),
- getConstant(N2C->getZExtValue() % Factor, DL,
- N2.getValueType()));
+ getVectorIdxConstant(N2C->getZExtValue() % Factor, DL));
}
- // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
- // expanding large vector constants.
- if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
- SDValue Elt = N1.getOperand(N2C->getZExtValue());
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while
+ // lowering is expanding large vector constants.
+ if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR ||
+ N1.getOpcode() == ISD::SPLAT_VECTOR)) {
+ assert((N1.getOpcode() != ISD::BUILD_VECTOR ||
+ N1.getValueType().isFixedLengthVector()) &&
+ "BUILD_VECTOR used for scalable vectors");
+ unsigned Index =
+ N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0;
+ SDValue Elt = N1.getOperand(Index);
if (VT != Elt.getValueType())
// If the vector element type is not legal, the BUILD_VECTOR operands
@@ -5377,8 +5512,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
// when vector types are scalarized and v1iX is legal.
- // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx).
+ // Here we are completely ignoring the extract element index (N2),
+ // which is fine for fixed width vectors, since any index other than 0
+ // is undefined anyway. However, this cannot be ignored for scalable
+ // vectors - in theory we could support this, but we don't want to do this
+ // without a profitability check.
if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().isFixedLengthVector() &&
N1.getValueType().getVectorNumElements() == 1) {
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
N1.getOperand(1));
@@ -5406,50 +5547,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
break;
case ISD::EXTRACT_SUBVECTOR:
- if (VT.isSimple() && N1.getValueType().isSimple()) {
- assert(VT.isVector() && N1.getValueType().isVector() &&
- "Extract subvector VTs must be a vectors!");
- assert(VT.getVectorElementType() ==
- N1.getValueType().getVectorElementType() &&
- "Extract subvector VTs must have the same element type!");
- assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
- "Extract subvector must be from larger vector to smaller vector!");
-
- if (N2C) {
- assert((VT.getVectorNumElements() + N2C->getZExtValue()
- <= N1.getValueType().getVectorNumElements())
- && "Extract subvector overflow!");
- }
-
- // Trivial extraction.
- if (VT.getSimpleVT() == N1.getSimpleValueType())
- return N1;
-
- // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
- if (N1.isUndef())
- return getUNDEF(VT);
+ EVT N1VT = N1.getValueType();
+ assert(VT.isVector() && N1VT.isVector() &&
+ "Extract subvector VTs must be vectors!");
+ assert(VT.getVectorElementType() == N1VT.getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) &&
+ "Cannot extract a scalable vector from a fixed length vector!");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) &&
+ "Extract subvector must be from larger vector to smaller vector!");
+ assert(N2C && "Extract subvector index must be a constant");
+ assert((VT.isScalableVector() != N1VT.isScalableVector() ||
+ (VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
+ N1VT.getVectorMinNumElements()) &&
+ "Extract subvector overflow!");
+
+ // Trivial extraction.
+ if (VT == N1VT)
+ return N1;
- // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
- // the concat have the same type as the extract.
- if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0 &&
- VT == N1.getOperand(0).getValueType()) {
- unsigned Factor = VT.getVectorNumElements();
- return N1.getOperand(N2C->getZExtValue() / Factor);
- }
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
- // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
- // during shuffle legalization.
- if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
- VT == N1.getOperand(1).getValueType())
- return N1.getOperand(1);
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorMinNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
}
+
+ // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+ // during shuffle legalization.
+ if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+ VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
break;
}
// Perform trivial constant folding.
- if (SDValue SV =
- FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
+ if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}))
return SV;
if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
@@ -5571,8 +5710,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"SETCC operands must have the same type!");
assert(VT.isVector() == N1.getValueType().isVector() &&
"SETCC type should be vector iff the operand type is vector!");
- assert((!VT.isVector() ||
- VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) &&
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"SETCC vector element counts must match!");
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
@@ -5594,8 +5733,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
llvm_unreachable("should use getVectorShuffle constructor!");
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
- // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
- if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
+ // for scalable vectors where we will generate appropriate code to
+ // deal with out-of-bounds cases correctly.
+ if (N3C && N1.getValueType().isFixedLengthVector() &&
+ N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
@@ -5612,33 +5754,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Inserting undef into undef is still undef.
if (N1.isUndef() && N2.isUndef())
return getUNDEF(VT);
- SDValue Index = N3;
- if (VT.isSimple() && N1.getValueType().isSimple()
- && N2.getValueType().isSimple()) {
- assert(VT.isVector() && N1.getValueType().isVector() &&
- N2.getValueType().isVector() &&
- "Insert subvector VTs must be a vectors");
- assert(VT == N1.getValueType() &&
- "Dest and insert subvector source types must match!");
- assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
- "Insert subvector must be from smaller vector to larger vector!");
- if (isa<ConstantSDNode>(Index)) {
- assert((N2.getValueType().getVectorNumElements() +
- cast<ConstantSDNode>(Index)->getZExtValue()
- <= VT.getVectorNumElements())
- && "Insert subvector overflow!");
- }
- // Trivial insertion.
- if (VT.getSimpleVT() == N2.getSimpleValueType())
- return N2;
+ EVT N2VT = N2.getValueType();
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(VT.isVector() && N2VT.isVector() &&
+ "Insert subvector VTs must be vectors!");
+ assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) &&
+ "Cannot insert a scalable vector into a fixed length vector!");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ assert(isa<ConstantSDNode>(N3) &&
+ "Insert subvector index must be constant");
+ assert((VT.isScalableVector() != N2VT.isScalableVector() ||
+ (N2VT.getVectorMinNumElements() +
+ cast<ConstantSDNode>(N3)->getZExtValue()) <=
+ VT.getVectorMinNumElements()) &&
+ "Insert subvector overflow!");
+
+ // Trivial insertion.
+ if (VT == N2VT)
+ return N2;
- // If this is an insert of an extracted vector into an undef vector, we
- // can just use the input to the extract.
- if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
- return N2.getOperand(0);
- }
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
break;
}
case ISD::BITCAST:
@@ -5867,7 +6010,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Alignment,
+ uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -5891,37 +6034,38 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Alignment > SrcAlign)
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
-
+ const MemOp Op = isZeroConstant
+ ? MemOp::Set(Size, DstAlignCanChange, Alignment,
+ /*IsZeroMemset*/ true, isVol)
+ : MemOp::Copy(Size, DstAlignCanChange, Alignment,
+ *SrcAlign, isVol, CopyFromConstant);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
- (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
- /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
- /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+ MemOps, Limit, Op, DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
+ Align NewAlign = DL.getABITypeAlign(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Alignment &&
- DL.exceedsNaturalStackAlignment(Align(NewAlign)))
- NewAlign /= 2;
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Alignment = NewAlign;
}
@@ -5968,7 +6112,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Value.getNode()) {
Store = DAG.getStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
OutChains.push_back(Store);
}
}
@@ -5991,12 +6135,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
+ commonAlignment(*SrcAlign, SrcOff).value(),
+ SrcMMOFlags);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6052,7 +6197,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align,
+ uint64_t Size, Align Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -6074,29 +6219,27 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Align > SrcAlign)
- SrcAlign = Align;
+ MaybeAlign SrcAlign = DAG.InferPtrAlign(Src);
+ if (!SrcAlign || Alignment > *SrcAlign)
+ SrcAlign = Alignment;
+ assert(SrcAlign && "SrcAlign must be set");
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
- // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
- // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
- // correct code.
- bool AllowOverlap = false;
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
- /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
- AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MemOps, Limit,
+ MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(C);
- unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
- if (NewAlign > Align) {
+ Align NewAlign = DL.getABITypeAlign(Ty);
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -6118,9 +6261,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value =
- DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
+ Value = DAG.getLoad(
+ VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -6132,9 +6275,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
- Store = DAG.getStore(Chain, dl, LoadValues[i],
- DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -6151,7 +6294,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// \param Dst Pointer to destination memory location.
/// \param Src Value of byte to write into the memory.
/// \param Size Number of bytes to write.
-/// \param Align Alignment of the destination in bytes.
+/// \param Alignment Alignment of the destination in bytes.
/// \param isVol True if destination is volatile.
/// \param DstPtrInfo IR information on the memory pointer.
/// \returns New head in the control flow, if lowering was successful, empty
@@ -6162,7 +6305,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// memory size.
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align, bool isVol,
+ uint64_t Size, Align Alignment, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
@@ -6183,21 +6326,19 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!TLI.findOptimalMemOpLowering(
- MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
- (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
- /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
- /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes()))
+ MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
+ DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
- if (NewAlign > Align) {
+ Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty);
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ if (MFI.getObjectAlign(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -6235,7 +6376,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align,
+ DstPtrInfo.getWithOffset(DstOff), Alignment.value(),
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
@@ -6256,12 +6397,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
}
SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6270,9 +6409,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(),Align,
- isVol, false, DstPtrInfo, SrcPtrInfo);
+ SDValue Result = getMemcpyLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6281,7 +6420,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemcpy(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline,
DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -6292,8 +6431,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (AlwaysInline) {
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(), Align, isVol,
- true, DstPtrInfo, SrcPtrInfo);
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, SrcPtrInfo);
}
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
@@ -6372,12 +6511,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
}
SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6386,10 +6523,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result =
- getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(), Align, isVol,
- false, DstPtrInfo, SrcPtrInfo);
+ SDValue Result = getMemmoveLoadsAndStores(
+ *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6397,8 +6533,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
if (TSI) {
- SDValue Result = TSI->EmitTargetCodeForMemmove(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ SDValue Result =
+ TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size,
+ Alignment, isVol, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6476,11 +6613,9 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
}
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
- SDValue Src, SDValue Size, unsigned Align,
+ SDValue Src, SDValue Size, Align Alignment,
bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo) {
- assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
-
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -6489,9 +6624,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result =
- getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstPtrInfo);
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -6501,7 +6636,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemset(
- *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
}
@@ -6662,11 +6797,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
- EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+ EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
- if (Align == 0) // Ensure that codegen never sees alignment 0
- Align = getEVTAlignment(MemVT);
-
if (!Size && MemVT.isScalableVector())
Size = MemoryLocation::UnknownSize;
else if (!Size)
@@ -6674,7 +6806,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -6686,8 +6818,6 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
- Opcode == ISD::LIFETIME_START ||
- Opcode == ISD::LIFETIME_END ||
((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -6795,13 +6925,11 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment,
+ Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
@@ -6810,9 +6938,10 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
@@ -6867,7 +6996,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, MachinePointerInfo PtrInfo,
- unsigned Alignment,
+ MaybeAlign Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo, const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -6885,7 +7014,7 @@ SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
EVT VT, SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
- unsigned Alignment,
+ MaybeAlign Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -6918,12 +7047,10 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- unsigned Alignment,
+ Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(Val.getValueType());
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
@@ -6932,8 +7059,10 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -6969,13 +7098,11 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- EVT SVT, unsigned Alignment,
+ EVT SVT, Align Alignment,
MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(SVT);
MMOFlags |= MachineMemOperand::MOStore;
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
@@ -7288,9 +7415,24 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
return SDValue();
}
-// TODO: Use fast-math-flags to enable more simplifications.
-SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
+ SDNodeFlags Flags) {
+ // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand
+ // (an undef operand can be chosen to be Nan/Inf), then the result of this
+ // operation is poison. That result can be relaxed to undef.
+ ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true);
ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ bool HasNan = (XC && XC->getValueAPF().isNaN()) ||
+ (YC && YC->getValueAPF().isNaN());
+ bool HasInf = (XC && XC->getValueAPF().isInfinity()) ||
+ (YC && YC->getValueAPF().isInfinity());
+
+ if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
+ if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef()))
+ return getUNDEF(X.getValueType());
+
if (!YC)
return SDValue();
@@ -7394,6 +7536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
createOperands(N, Ops);
}
+ N->setFlags(Flags);
InsertNode(N);
SDValue V(N, 0);
NewSDValueDbgMsg(V, "Creating new node: ", this);
@@ -7406,7 +7549,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
- ArrayRef<SDValue> Ops) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
@@ -7481,6 +7624,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
createOperands(N, Ops);
}
+
+ N->setFlags(Flags);
InsertNode(N);
SDValue V(N, 0);
NewSDValueDbgMsg(V, "Creating new node: ", this);
@@ -7919,7 +8064,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
@@ -9196,9 +9341,8 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
return SDValue();
BinOp = (ISD::NodeType)CandidateBinOp;
- return getNode(
- ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
- getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getVectorIdxConstant(0, SDLoc(Op)));
};
// At each stage, we're looking for something that looks like:
@@ -9246,6 +9390,28 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
PrevOp = Op;
}
+ // Handle subvector reductions, which tend to appear after the shuffle
+ // reduction stages.
+ while (Op.getOpcode() == CandidateBinOp) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op0.getOperand(0) != Op1.getOperand(0))
+ break;
+ SDValue Src = Op0.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (NumSrcElts != (2 * NumElts))
+ break;
+ if (!(Op0.getConstantOperandAPInt(1) == 0 &&
+ Op1.getConstantOperandAPInt(1) == NumElts) &&
+ !(Op1.getConstantOperandAPInt(1) == 0 &&
+ Op0.getConstantOperandAPInt(1) == NumElts))
+ break;
+ Op = Src;
+ }
+
BinOp = (ISD::NodeType)CandidateBinOp;
return Op;
}
@@ -9276,9 +9442,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
if (OperandVT.isVector()) {
// A vector operand; extract a single element.
EVT OperandEltVT = OperandVT.getVectorElementType();
- Operands[j] =
- getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
- getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
} else {
// A scalar operand; just use it as is.
Operands[j] = Operand;
@@ -9395,9 +9560,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
-/// it cannot be inferred.
-unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+/// InferPtrAlignment - Infer alignment of a load / store address. Return None
+/// if it cannot be inferred.
+MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
const GlobalValue *GV = nullptr;
int64_t GVOffset = 0;
@@ -9406,9 +9571,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
KnownBits Known(PtrWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
- unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
- if (Align)
- return MinAlign(Align, GVOffset);
+ if (AlignBits)
+ return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
@@ -9426,12 +9590,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (FrameIdx != INT_MIN) {
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
- FrameOffset);
- return FIInfoAlign;
+ return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset);
}
- return 0;
+ return None;
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -9447,20 +9609,58 @@ std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
return std::make_pair(LoVT, HiVT);
}
+/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a
+/// type, dependent on an enveloping VT that has been split into two identical
+/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size.
+std::pair<EVT, EVT>
+SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
+ bool *HiIsEmpty) const {
+ EVT EltTp = VT.getVectorElementType();
+ bool IsScalable = VT.isScalableVector();
+ // Examples:
+ // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty)
+ // custom VL=9 with enveloping VL=8/8 yields 8/1
+ // custom VL=10 with enveloping VL=8/8 yields 8/2
+ // etc.
+ unsigned VTNumElts = VT.getVectorNumElements();
+ unsigned EnvNumElts = EnvVT.getVectorNumElements();
+ EVT LoVT, HiVT;
+ if (VTNumElts > EnvNumElts) {
+ LoVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts,
+ IsScalable);
+ *HiIsEmpty = false;
+ } else {
+ // Flag that hi type has zero storage size, but return split envelop type
+ // (this would be easier if vector types with zero elements were allowed).
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable);
+ HiVT = EnvVT;
+ *HiIsEmpty = true;
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
/// low/high part.
std::pair<SDValue, SDValue>
SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
const EVT &HiVT) {
- assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
- N.getValueType().getVectorNumElements() &&
+ assert(LoVT.isScalableVector() == HiVT.isScalableVector() &&
+ LoVT.isScalableVector() == N.getValueType().isScalableVector() &&
+ "Splitting vector with an invalid mixture of fixed and scalable "
+ "vector types");
+ assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <=
+ N.getValueType().getVectorMinNumElements() &&
"More vector elements requested than available!");
SDValue Lo, Hi;
- Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
- getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ Lo =
+ getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL));
+ // For scalable vectors it is safe to use LoVT.getVectorMinNumElements()
+ // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales
+ // IDX with the runtime scaling factor of the result vector type. For
+ // fixed-width result vectors, that runtime scaling factor is 1.
Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
- getConstant(LoVT.getVectorNumElements(), DL,
- TLI->getVectorIdxTy(getDataLayout())));
+ getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
return std::make_pair(Lo, Hi);
}
@@ -9470,22 +9670,22 @@ SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
NextPowerOf2(VT.getVectorNumElements()));
return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
- getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ getVectorIdxConstant(0, DL));
}
void SelectionDAG::ExtractVectorElements(SDValue Op,
SmallVectorImpl<SDValue> &Args,
- unsigned Start, unsigned Count) {
+ unsigned Start, unsigned Count,
+ EVT EltVT) {
EVT VT = Op.getValueType();
if (Count == 0)
Count = VT.getVectorNumElements();
-
- EVT EltVT = VT.getVectorElementType();
- EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
+ if (EltVT == EVT())
+ EltVT = VT.getVectorElementType();
SDLoc SL(Op);
for (unsigned i = Start, e = Start + Count; i != e; ++i) {
- Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
- Op, getConstant(i, SL, IdxTy)));
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op,
+ getVectorIdxConstant(i, SL)));
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 728d963a916f5..1d596c89c9113 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,7 +69,6 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
@@ -136,6 +135,11 @@ using namespace SwitchCG;
/// some float libcalls (6, 8 or 12 bits).
static unsigned LimitFloatPrecision;
+static cl::opt<bool>
+ InsertAssertAlign("insert-assert-align", cl::init(true),
+ cl::desc("Insert the experimental `assertalign` node."),
+ cl::ReallyHidden);
+
static cl::opt<unsigned, true>
LimitFPPrecision("limit-float-precision",
cl::desc("Generate low-precision inline sequences "
@@ -206,12 +210,17 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
MVT PartVT, EVT ValueVT, const Value *V,
Optional<CallingConv::ID> CC = None,
Optional<ISD::NodeType> AssertOp = None) {
+ // Let the target assemble the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, CC))
+ return Val;
+
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
CC);
assert(NumParts > 0 && "No parts to assemble!");
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
@@ -347,7 +356,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
const char *AsmError = ", possible invalid constraint for vector type";
if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (isa<InlineAsm>(CI->getCalledValue()))
+ if (CI->isInlineAsm())
return Ctx.emitError(I, ErrMsg + AsmError);
return Ctx.emitError(I, ErrMsg);
@@ -415,10 +424,13 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
EVT BuiltVectorTy =
- EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
- (IntermediateVT.isVector()
- ? IntermediateVT.getVectorNumElements() * NumParts
- : NumIntermediates));
+ IntermediateVT.isVector()
+ ? EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(),
+ IntermediateVT.getVectorElementCount() * NumParts)
+ : EVT::getVectorVT(*DAG.getContext(),
+ IntermediateVT.getScalarType(),
+ NumIntermediates);
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
DL, BuiltVectorTy, Ops);
@@ -436,18 +448,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
- assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ assert((PartEVT.getVectorElementCount().Min >
+ ValueVT.getVectorElementCount().Min) &&
+ (PartEVT.getVectorElementCount().Scalable ==
+ ValueVT.getVectorElementCount().Scalable) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
}
// Vector/Vector bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
"Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
@@ -472,9 +486,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
ValueVT.getVectorElementType(), Elts);
Val = DAG.getBitcast(WiderVecType, Val);
- return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
}
diagnosePossiblyInvalidConstraint(
@@ -484,9 +497,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Handle cases such as i8 -> <1 x i1>
EVT ValueSVT = ValueVT.getVectorElementType();
- if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
- Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
- : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
+ if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
+ else
+ Val = ValueVT.isFloatingPoint()
+ ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+ : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
+ }
return DAG.getBuildVector(ValueVT, DL, Val);
}
@@ -504,6 +522,11 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
const Value *V,
Optional<CallingConv::ID> CallConv = None,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ // Let the target split the parts if it wants to
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
+ CallConv))
+ return;
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
@@ -633,7 +656,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
static SDValue widenVectorToPartType(SelectionDAG &DAG,
SDValue Val, const SDLoc &DL, EVT PartVT) {
- if (!PartVT.isVector())
+ if (!PartVT.isFixedLengthVector())
return SDValue();
EVT ValueVT = Val.getValueType();
@@ -679,16 +702,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Val = Widened;
} else if (PartVT.isVector() &&
PartEVT.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorElementCount() ==
+ ValueVT.getVectorElementCount()) {
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else {
if (ValueVT.getVectorNumElements() == 1) {
- Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
} else {
assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
"lossy conversion of vector to scalar type");
@@ -723,15 +746,18 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
- unsigned IntermediateNumElts = IntermediateVT.isVector() ?
- IntermediateVT.getVectorNumElements() : 1;
+ assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
+ "Mixing scalable and fixed vectors when copying in parts");
- // Convert the vector to the appropriate type if necessary.
- unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
+ ElementCount DestEltCnt;
+
+ if (IntermediateVT.isVector())
+ DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
+ else
+ DestEltCnt = ElementCount(NumIntermediates, false);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
- MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
if (ValueVT != BuiltVectorTy) {
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
Val = Widened;
@@ -743,12 +769,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector()) {
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
- DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
+ // This does something sensible for scalable vectors - see the
+ // definition of EXTRACT_SUBVECTOR for further details.
+ unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
} else {
- Ops[i] = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
- DAG.getConstant(i, DL, IdxVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getVectorIdxConstant(i, DL));
}
}
@@ -1112,32 +1141,26 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
- // Propagate the fast-math-flags of this IR instruction to the DAG node that
- // maps to this instruction.
- // TODO: We could handle all flags (nsw, etc) here.
- // TODO: If an IR instruction maps to >1 node, only the final node will have
- // flags set.
- if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags IncomingFlags;
- IncomingFlags.copyFMF(*FPMO);
- if (!Node->getFlags().isDefined())
- Node->setFlags(IncomingFlags);
- else
- Node->intersectFlagsWith(IncomingFlags);
- }
- }
- // Constrained FP intrinsics with fpexcept.ignore should also get
- // the NoFPExcept flag.
- if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
- if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+ // ConstrainedFPIntrinsics handle their own FMF.
+ if (!isa<ConstrainedFPIntrinsic>(&I)) {
+ // Propagate the fast-math-flags of this IR instruction to the DAG node that
+ // maps to this instruction.
+ // TODO: We could handle all flags (nsw, etc) here.
+ // TODO: If an IR instruction maps to >1 node, only the final node will have
+ // flags set.
if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags Flags = Node->getFlags();
- Flags.setNoFPExcept(true);
- Node->setFlags(Flags);
+ SDNodeFlags IncomingFlags;
+ IncomingFlags.copyFMF(*FPMO);
+ if (!Node->getFlags().isDefined())
+ Node->setFlags(IncomingFlags);
+ else
+ Node->intersectFlagsWith(IncomingFlags);
}
+ }
+ }
if (!I.isTerminator() && !HasTailCall &&
- !isStatepoint(&I)) // statepoints handle their exports internally
+ !isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
CurInst = nullptr;
@@ -1399,11 +1422,11 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() {
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
- DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
SDValue Result;
if (It != FuncInfo.ValueMap.end()) {
- unsigned InReg = It->second;
+ Register InReg = It->second;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), InReg, Ty,
@@ -1437,12 +1460,6 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
return Val;
}
-// Return true if SDValue exists for the given Value
-bool SelectionDAGBuilder::findValue(const Value *V) const {
- return (NodeMap.find(V) != NodeMap.end()) ||
- (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
-}
-
/// getNonRegisterValue - Return an SDValue for the given Value, but
/// don't look in FuncInfo.ValueMap for a virtual register.
SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
@@ -1486,6 +1503,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
+ if (match(C, m_VScale(DAG.getDataLayout())))
+ return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
+
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
@@ -1558,16 +1578,17 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return DAG.getBlockAddress(BA, VT);
VectorType *VecTy = cast<VectorType>(V->getType());
- unsigned NumElements = VecTy->getNumElements();
// Now that we know the number and type of the elements, get that number of
// elements into the Ops array based on what kind of constant it is.
- SmallVector<SDValue, 16> Ops;
if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ SmallVector<SDValue, 16> Ops;
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CV->getOperand(i)));
- } else {
- assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ } else if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
@@ -1576,11 +1597,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
else
Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
- Ops.assign(NumElements, Op);
- }
- // Create a BUILD_VECTOR node.
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ if (isa<ScalableVectorType>(VecTy))
+ return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
+ else {
+ SmallVector<SDValue, 16> Ops;
+ Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
+ }
+ }
+ llvm_unreachable("Unknown vector constant");
}
// If this is a static alloca, generate it as the frameindex instead of
@@ -1603,6 +1629,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
+ return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
+ }
llvm_unreachable("Can't get register for value!");
}
@@ -1611,17 +1640,12 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
bool IsSEH = isAsynchronousEHPersonality(Pers);
- bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
if (!IsSEH)
CatchPadMBB->setIsEHScopeEntry();
// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
if (IsMSVCCXX || IsCoreCLR)
CatchPadMBB->setIsEHFuncletEntry();
- // Wasm does not need catchpads anymore
- if (!IsWasmCXX)
- DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
- getControlRoot()));
}
void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
@@ -1835,6 +1859,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
+ Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
@@ -1843,9 +1868,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
- Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
- Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
+ commonAlignment(BaseAlign, Offsets[i]));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1964,7 +1991,7 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
if (V->getType()->isEmptyTy())
return;
- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
assert(!V->use_empty() && "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
@@ -2277,7 +2304,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
- // As long as jumps are not expensive, this should improve performance.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -2292,9 +2321,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
+ Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
!I.hasMetadata(LLVMContext::MD_unpredictable) &&
- (Opcode == Instruction::And || Opcode == Instruction::Or)) {
+ (Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
@@ -2516,7 +2548,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef = MF.getMachineMemOperand(
- MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
+ MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
if (PtrTy != PtrMemTy)
@@ -2597,17 +2629,13 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineMemOperand::MOVolatile);
}
- // Perform the comparison via a subtract/getsetcc.
- EVT VT = Guard.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
-
+ // Perform the comparison via a getsetcc.
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ Guard.getValueType()),
+ Guard, GuardVal, ISD::SETNE);
- // If the sub is not 0, then we know the guard/stackslot do not equal, so
- // branch to failure MBB.
+ // If the guard/stackslot do not equal, branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
@@ -2640,6 +2668,11 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
// Passing 'true' for doesNotReturn above won't generate the trap for us.
if (TM.getTargetTriple().isPS4CPU())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ if (TM.getTargetTriple().isWasm())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
DAG.setRoot(Chain);
}
@@ -2778,14 +2811,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_gc_transition,
+ LLVMContext::OB_gc_live,
LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
- const Value *Callee(I.getCalledValue());
+ const Value *Callee(I.getCalledOperand());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
- visitInlineAsm(&I);
+ visitInlineAsm(I);
else if (Fn && Fn->isIntrinsic()) {
switch (Fn->getIntrinsicID()) {
default:
@@ -2795,10 +2830,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I, EHPadBB);
+ visitPatchpoint(I, EHPadBB);
break;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
+ LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
case Intrinsic::wasm_rethrow_in_catch: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
@@ -2822,14 +2857,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// with deopt state.
LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
} else {
- LowerCallTo(&I, getValue(Callee), false, EHPadBB);
+ LowerCallTo(I, getValue(Callee), false, EHPadBB);
}
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
// We already took care of the exported value for the statepoint instruction
// during call to the LowerStatepoint.
- if (!isStatepoint(I)) {
+ if (!isa<GCStatepointInst>(I)) {
CopyToExportRegsIfNeeded(&I);
}
@@ -2862,18 +2897,19 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
- assert(isa<InlineAsm>(I.getCalledValue()) &&
- "Only know how to handle inlineasm callbr");
- visitInlineAsm(&I);
+ assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
+ visitInlineAsm(I);
+ CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
// Update successor info.
- addSuccessorWithProb(CallBrMBB, Return);
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
- addSuccessorWithProb(CallBrMBB, Target);
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ Target->setIsInlineAsmBrIndirectTarget();
}
CallBrMBB->normalizeSuccProbs();
@@ -3003,133 +3039,6 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
visitBinary(I, ISD::FSUB);
}
-/// Checks if the given instruction performs a vector reduction, in which case
-/// we have the freedom to alter the elements in the result as long as the
-/// reduction of them stays unchanged.
-static bool isVectorReductionOp(const User *I) {
- const Instruction *Inst = dyn_cast<Instruction>(I);
- if (!Inst || !Inst->getType()->isVectorTy())
- return false;
-
- auto OpCode = Inst->getOpcode();
- switch (OpCode) {
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- break;
- case Instruction::FAdd:
- case Instruction::FMul:
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (FPOp->getFastMathFlags().isFast())
- break;
- LLVM_FALLTHROUGH;
- default:
- return false;
- }
-
- unsigned ElemNum = Inst->getType()->getVectorNumElements();
- // Ensure the reduction size is a power of 2.
- if (!isPowerOf2_32(ElemNum))
- return false;
-
- unsigned ElemNumToReduce = ElemNum;
-
- // Do DFS search on the def-use chain from the given instruction. We only
- // allow four kinds of operations during the search until we reach the
- // instruction that extracts the first element from the vector:
- //
- // 1. The reduction operation of the same opcode as the given instruction.
- //
- // 2. PHI node.
- //
- // 3. ShuffleVector instruction together with a reduction operation that
- // does a partial reduction.
- //
- // 4. ExtractElement that extracts the first element from the vector, and we
- // stop searching the def-use chain here.
- //
- // 3 & 4 above perform a reduction on all elements of the vector. We push defs
- // from 1-3 to the stack to continue the DFS. The given instruction is not
- // a reduction operation if we meet any other instructions other than those
- // listed above.
-
- SmallVector<const User *, 16> UsersToVisit{Inst};
- SmallPtrSet<const User *, 16> Visited;
- bool ReduxExtracted = false;
-
- while (!UsersToVisit.empty()) {
- auto User = UsersToVisit.back();
- UsersToVisit.pop_back();
- if (!Visited.insert(User).second)
- continue;
-
- for (const auto *U : User->users()) {
- auto Inst = dyn_cast<Instruction>(U);
- if (!Inst)
- return false;
-
- if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
- if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
- return false;
- UsersToVisit.push_back(U);
- } else if (const ShuffleVectorInst *ShufInst =
- dyn_cast<ShuffleVectorInst>(U)) {
- // Detect the following pattern: A ShuffleVector instruction together
- // with a reduction that do partial reduction on the first and second
- // ElemNumToReduce / 2 elements, and store the result in
- // ElemNumToReduce / 2 elements in another vector.
-
- unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
- if (ResultElements < ElemNum)
- return false;
-
- if (ElemNumToReduce == 1)
- return false;
- if (!isa<UndefValue>(U->getOperand(1)))
- return false;
- for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
- if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
- return false;
- for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
- if (ShufInst->getMaskValue(i) != -1)
- return false;
-
- // There is only one user of this ShuffleVector instruction, which
- // must be a reduction operation.
- if (!U->hasOneUse())
- return false;
-
- auto U2 = dyn_cast<Instruction>(*U->user_begin());
- if (!U2 || U2->getOpcode() != OpCode)
- return false;
-
- // Check operands of the reduction operation.
- if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
- (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
- UsersToVisit.push_back(U2);
- ElemNumToReduce /= 2;
- } else
- return false;
- } else if (isa<ExtractElementInst>(U)) {
- // At this moment we should have reduced all elements in the vector.
- if (ElemNumToReduce != 1)
- return false;
-
- const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
- if (!Val || !Val->isZero())
- return false;
-
- ReduxExtracted = true;
- } else
- return false;
- }
- }
- return ReduxExtracted;
-}
-
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
@@ -3148,17 +3057,6 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
Flags.setExact(ExactOp->isExact());
}
- if (isVectorReductionOp(&I)) {
- Flags.setVectorReduction(true);
- LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
-
- // If no flags are set we will propagate the incoming flags, if any flags
- // are set, we will intersect them with the incoming flag and so we need to
- // copy the FMF flags here.
- if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
- Flags.copyFMF(*FPOp);
- }
- }
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -3296,9 +3194,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
SDValue Cond = getValue(I.getOperand(0));
SDValue LHSVal = getValue(I.getOperand(1));
SDValue RHSVal = getValue(I.getOperand(2));
- auto BaseOps = {Cond};
- ISD::NodeType OpCode = Cond.getValueType().isVector() ?
- ISD::VSELECT : ISD::SELECT;
+ SmallVector<SDValue, 1> BaseOps(1, Cond);
+ ISD::NodeType OpCode =
+ Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
@@ -3381,13 +3279,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
- BaseOps = {};
+ BaseOps.clear();
}
if (IsUnaryAbs) {
OpCode = Opc;
LHSVal = getValue(LHS);
- BaseOps = {};
+ BaseOps.clear();
}
}
@@ -3577,19 +3475,22 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
- Constant *MaskV = cast<Constant>(I.getOperand(2));
+ ArrayRef<int> Mask;
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
+ Mask = SVI->getShuffleMask();
+ else
+ Mask = cast<ConstantExpr>(I).getShuffleMask();
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
- unsigned SrcNumElts = SrcVT.getVectorNumElements();
- if (MaskV->isNullValue() && VT.isScalableVector()) {
+ if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
+ VT.isScalableVector()) {
// Canonical splat form of first element of first input vector.
- SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- SrcVT.getScalarType(), Src1,
- DAG.getConstant(0, DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue FirstElt =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
+ DAG.getVectorIdxConstant(0, DL));
setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
return;
}
@@ -3599,8 +3500,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// for targets that support a SPLAT_VECTOR for non-scalable vector types.
assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
unsigned MaskNumElts = Mask.size();
if (SrcNumElts == MaskNumElts) {
@@ -3683,9 +3583,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// If the concatenated vector was padded, extract a subvector with the
// correct number of elements.
if (MaskNumElts != PaddedMaskNumElts)
- Result = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getVectorIdxConstant(0, DL));
setValue(&I, Result);
return;
@@ -3729,10 +3628,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
- Src = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
- DAG.getConstant(StartIdx[Input], DL,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+ DAG.getVectorIdxConstant(StartIdx[Input], DL));
}
}
@@ -3754,7 +3651,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// replacing the shuffle with extract and build vector.
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
for (int Idx : Mask) {
SDValue Res;
@@ -3765,8 +3661,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
+ DAG.getVectorIdxConstant(Idx, DL));
}
Ops.push_back(Res);
@@ -3882,13 +3778,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
- unsigned VectorWidth = I.getType()->isVectorTy() ?
- I.getType()->getVectorNumElements() : 0;
+ bool IsVectorGEP = I.getType()->isVectorTy();
+ ElementCount VectorElementCount =
+ IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
+ : ElementCount(0, false);
- if (VectorWidth && !N.getValueType().isVector()) {
+ if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
- EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
- N = DAG.getSplatBuildVector(VT, dl, N);
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+ if (VectorElementCount.Scalable)
+ N = DAG.getSplatVector(VT, dl, N);
+ else
+ N = DAG.getSplatBuildVector(VT, dl, N);
}
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3910,9 +3811,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
+ // IdxSize is the width of the arithmetic according to IR semantics.
+ // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+ // (and fix up the result later).
unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
MVT IdxTy = MVT::getIntegerVT(IdxSize);
- APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+ TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ // We intentionally mask away the high bits here; ElementSize may not
+ // fit in IdxTy.
+ APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+ bool ElementScalable = ElementSize.isScalable();
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3920,14 +3828,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (C && isa<VectorType>(C->getType()))
C = C->getSplatValue();
- if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
- if (CI->isZero())
- continue;
- APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+ const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (CI && CI->isZero())
+ continue;
+ if (CI && !ElementScalable) {
+ APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
LLVMContext &Context = *DAG.getContext();
- SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
- DAG.getConstant(Offs, dl, IdxTy);
+ SDValue OffsVal;
+ if (IsVectorGEP)
+ OffsVal = DAG.getConstant(
+ Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+ else
+ OffsVal = DAG.getConstant(Offs, dl, IdxTy);
// In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
@@ -3941,31 +3853,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
continue;
}
- // N = N + Idx * ElementSize;
+ // N = N + Idx * ElementMul;
SDValue IdxN = getValue(Idx);
- if (!IdxN.getValueType().isVector() && VectorWidth) {
- EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
- IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+ if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+ VectorElementCount);
+ if (VectorElementCount.Scalable)
+ IdxN = DAG.getSplatVector(VT, dl, IdxN);
+ else
+ IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
- // If this is a multiply by a power of two, turn it into a shl
- // immediately. This is a very common case.
- if (ElementSize != 1) {
- if (ElementSize.isPowerOf2()) {
- unsigned Amt = ElementSize.logBase2();
- IdxN = DAG.getNode(ISD::SHL, dl,
- N.getValueType(), IdxN,
- DAG.getConstant(Amt, dl, IdxN.getValueType()));
- } else {
- SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
- IdxN.getValueType());
- IdxN = DAG.getNode(ISD::MUL, dl,
- N.getValueType(), IdxN, Scale);
+ if (ElementScalable) {
+ EVT VScaleTy = N.getValueType().getScalarType();
+ SDValue VScale = DAG.getNode(
+ ISD::VSCALE, dl, VScaleTy,
+ DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
+ if (IsVectorGEP)
+ VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+ } else {
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementMul != 1) {
+ if (ElementMul.isPowerOf2()) {
+ unsigned Amt = ElementMul.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
+ IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
}
}
@@ -3991,8 +3917,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
- unsigned Align =
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
+ MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
SDValue AllocSize = getValue(I.getArraySize());
@@ -4007,25 +3932,26 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign =
- DAG.getSubtarget().getFrameLowering()->getStackAlignment();
- if (Align <= StackAlign)
- Align = 0;
+ Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
+ if (*Alignment <= StackAlign)
+ Alignment = None;
+ const uint64_t StackAlignMask = StackAlign.value() - 1U;
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
+ DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
- AllocSize =
- DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
- DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
+ AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~StackAlignMask, dl, IntPtr));
- SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
+ SDValue Ops[] = {
+ getRoot(), AllocSize,
+ DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
@@ -4057,13 +3983,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
-
- bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
- bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
- bool isDereferenceable =
- isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
- unsigned Alignment = I.getAlignment();
+ Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4076,6 +3996,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (NumValues == 0)
return;
+ bool isVolatile = I.isVolatile();
+
SDValue Root;
bool ConstantMemory = false;
if (isVolatile)
@@ -4109,6 +4031,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
+
+ MachineMemOperand::Flags MMOFlags
+ = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// Serializing loads here may result in excessive register pressure, and
@@ -4128,16 +4054,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
Flags);
- auto MMOFlags = MachineMemOperand::MONone;
- if (isVolatile)
- MMOFlags |= MachineMemOperand::MOVolatile;
- if (isNonTemporal)
- MMOFlags |= MachineMemOperand::MONonTemporal;
- if (isInvariant)
- MMOFlags |= MachineMemOperand::MOInvariant;
- if (isDereferenceable)
- MMOFlags |= MachineMemOperand::MODereferenceable;
- MMOFlags |= TLI.getMMOFlags(I);
SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -4260,16 +4176,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
- unsigned Alignment = I.getAlignment();
+ Align Alignment = I.getAlign();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- auto MMOFlags = MachineMemOperand::MONone;
- if (I.isVolatile())
- MMOFlags |= MachineMemOperand::MOVolatile;
- if (I.hasMetadata(LLVMContext::MD_nontemporal))
- MMOFlags |= MachineMemOperand::MONonTemporal;
- MMOFlags |= TLI.getMMOFlags(I);
+ auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
@@ -4304,25 +4215,25 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
- Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
Mask = I.getArgOperand(3);
};
- auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// llvm.masked.compressstore.*(Src0, Ptr, Mask)
Src0 = I.getArgOperand(0);
Ptr = I.getArgOperand(1);
Mask = I.getArgOperand(2);
- Alignment = 0;
+ Alignment = None;
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- unsigned Alignment;
+ MaybeAlign Alignment;
if (IsCompressing)
getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4335,19 +4246,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
EVT VT = Src0.getValueType();
if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4370,78 +4278,51 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
- SelectionDAGBuilder *SDB) {
+ SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
SelectionDAG& DAG = SDB->DAG;
- LLVMContext &Context = *DAG.getContext();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
- return false;
-
- const Value *GEPPtr = GEP->getPointerOperand();
- if (!GEPPtr->getType()->isVectorTy())
- Ptr = GEPPtr;
- else if (!(Ptr = getSplatValue(GEPPtr)))
- return false;
-
- unsigned FinalIndex = GEP->getNumOperands() - 1;
- Value *IndexVal = GEP->getOperand(FinalIndex);
- gep_type_iterator GTI = gep_type_begin(*GEP);
- // Ensure all the other indices are 0.
- for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
- auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ // Handle splat constant pointer.
+ if (auto *C = dyn_cast<Constant>(Ptr)) {
+ C = C->getSplatValue();
if (!C)
return false;
- if (isa<VectorType>(C->getType()))
- C = C->getSplatValue();
- auto *CI = dyn_cast_or_null<ConstantInt>(C);
- if (!CI || !CI->isZero())
- return false;
+
+ Base = SDB->getValue(C);
+
+ unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
+ Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ return true;
}
- // The operands of the GEP may be defined in another basic block.
- // In this case we'll not find nodes for the operands.
- if (!SDB->findValue(Ptr))
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getParent() != CurBB)
return false;
- Constant *C = dyn_cast<Constant>(IndexVal);
- if (!C && !SDB->findValue(IndexVal))
+
+ if (GEP->getNumOperands() != 2)
return false;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
- StructType *STy = GTI.getStructTypeOrNull();
-
- if (STy) {
- const StructLayout *SL = DL.getStructLayout(STy);
- if (isa<VectorType>(C->getType())) {
- C = C->getSplatValue();
- // FIXME: If getSplatValue may return nullptr for a structure?
- // If not, the following check can be removed.
- if (!C)
- return false;
- }
- auto *CI = cast<ConstantInt>(C);
- Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- } else {
- Scale = DAG.getTargetConstant(
- DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
- Index = SDB->getValue(IndexVal);
- }
- Base = SDB->getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ const Value *BasePtr = GEP->getPointerOperand();
+ const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
- if (STy || !Index.getValueType().isVector()) {
- unsigned GEPWidth = GEP->getType()->getVectorNumElements();
- EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
- Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
- }
+ // Make sure the base is scalar and the index is a vector.
+ if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
+ return false;
+
+ Base = SDB->getValue(BasePtr);
+ Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
+ Scale = DAG.getTargetConstant(
+ DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
@@ -4453,9 +4334,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
EVT VT = Src0.getValueType();
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
- if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
+ ->getMaybeAlignValue()
+ .getValueOr(DAG.getEVTAlign(VT));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
AAMDNodes AAInfo;
@@ -4465,18 +4346,15 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
- const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
- this);
-
- const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
- MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo);
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent());
+
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, AAInfo);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4493,25 +4371,25 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
- auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
Mask = I.getArgOperand(2);
Src0 = I.getArgOperand(3);
};
- auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
- unsigned& Alignment) {
+ auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
+ MaybeAlign &Alignment) {
// @llvm.masked.expandload.*(Ptr, Mask, Src0)
Ptr = I.getArgOperand(0);
- Alignment = 0;
+ Alignment = None;
Mask = I.getArgOperand(1);
Src0 = I.getArgOperand(2);
};
Value *PtrOperand, *MaskOperand, *Src0Operand;
- unsigned Alignment;
+ MaybeAlign Alignment;
if (IsExpanding)
getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
else
@@ -4524,7 +4402,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
EVT VT = Src0.getValueType();
if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Alignment = DAG.getEVTAlign(VT);
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4542,14 +4420,11 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo, Ranges);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
@@ -4569,9 +4444,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
- if (!Alignment)
- Alignment = DAG.getEVTAlignment(VT);
+ Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
+ ->getMaybeAlignValue()
+ .getValueOr(DAG.getEVTAlign(VT));
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4582,29 +4457,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Index;
ISD::MemIndexType IndexType;
SDValue Scale;
- const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
- this);
- bool ConstantMemory = false;
- if (UniformBase && AA &&
- AA->pointsToConstantMemory(
- MemoryLocation(BasePtr,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo))) {
- // Do not serialize (non-volatile) loads of constant memory with anything.
- Root = DAG.getEntryNode();
- ConstantMemory = true;
- }
-
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
- MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(),
- Alignment, AAInfo, Ranges);
+ bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+ I.getParent());
+ unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4616,9 +4476,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
Ops, MMO, IndexType);
- SDValue OutChain = Gather.getValue(1);
- if (!ConstantMemory)
- PendingLoads.push_back(OutChain);
+ PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
}
@@ -4633,19 +4491,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
- auto Alignment = DAG.getEVTAlignment(MemVT);
-
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SSID, SuccessOrdering,
- FailureOrdering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
dl, MemVT, VTs, InChain,
@@ -4684,18 +4537,13 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
SDValue InChain = getRoot();
auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
- auto Alignment = DAG.getEVTAlignment(MemVT);
-
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
- MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SSID, Ordering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
SDValue L =
DAG.getAtomic(NT, dl, MemVT, InChain,
@@ -4735,24 +4583,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
- auto Flags = MachineMemOperand::MOLoad;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- if (I.hasMetadata(LLVMContext::MD_invariant_load))
- Flags |= MachineMemOperand::MOInvariant;
- if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
- DAG.getDataLayout()))
- Flags |= MachineMemOperand::MODereferenceable;
-
- Flags |= TLI.getMMOFlags(I);
-
- MachineMemOperand *MMO =
- DAG.getMachineFunction().
- getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- Flags, MemVT.getStoreSize(),
- I.getAlignment() ? I.getAlignment() :
- DAG.getEVTAlignment(MemVT),
- AAMDNodes(), nullptr, SSID, Order);
+ auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
@@ -4773,7 +4608,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
PendingLoads.push_back(OutChain);
return;
}
-
+
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4800,16 +4635,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
if (I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- auto Flags = MachineMemOperand::MOStore;
- if (I.isVolatile())
- Flags |= MachineMemOperand::MOVolatile;
- Flags |= TLI.getMMOFlags(I);
+ auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
- MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
- nullptr, SSID, Ordering);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
+ I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
@@ -4899,10 +4730,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- Result = DAG.getMemIntrinsicNode(
- Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
+ Result =
+ DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4926,6 +4757,15 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
} else
Result = lowerRangeToAssertZExt(DAG, I, Result);
+ MaybeAlign Alignment = I.getRetAlign();
+ if (!Alignment)
+ Alignment = F->getAttributes().getRetAlignment();
+ // Insert `assertalign` node if there's an alignment.
+ if (InsertAssertAlign && Alignment) {
+ Result =
+ DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ }
+
setValue(&I, Result);
}
}
@@ -5465,7 +5305,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
SDValue LHS, SDValue RHS, SDValue Scale,
SelectionDAG &DAG, const TargetLowering &TLI) {
EVT VT = LHS.getValueType();
- bool Signed = Opcode == ISD::SDIVFIX;
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
LLVMContext &Ctx = *DAG.getContext();
// If the type is legal but the operation isn't, this node might survive all
@@ -5477,14 +5318,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
// by bumping the size by one bit. This will force it to Promote, enabling the
// early expansion and avoiding the need to expand later.
- // We don't have to do this if Scale is 0; that can always be expanded.
+ // We don't have to do this if Scale is 0; that can always be expanded, unless
+ // it's a saturating signed operation. Those can experience true integer
+ // division overflow, a case which we must avoid.
// FIXME: We wouldn't have to do this (or any of the early
// expansion/promotion) if it was possible to expand a libcall of an
// illegal type during operation legalization. But it's not, so things
// get a bit hacky.
unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
- if (ScaleInt > 0 &&
+ if ((ScaleInt > 0 || (Saturating && Signed)) &&
(TLI.isTypeLegal(VT) ||
(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
@@ -5506,8 +5349,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
}
- // TODO: Saturation.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
+ // For saturating operations, we need to shift up the LHS to get the
+ // proper saturation width, and then shift down again afterwards.
+ if (Saturating)
+ LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
+ DAG.getConstant(1, DL, ShiftTy));
SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+ if (Saturating)
+ Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
+ DAG.getConstant(1, DL, ShiftTy));
return DAG.getZExtOrTrunc(Res, DL, VT);
}
}
@@ -5622,6 +5473,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+ bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5643,6 +5495,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = IsDbgDeclare;
}
}
@@ -5691,13 +5544,13 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
RegAndSize.first, Variable, *FragmentExpr));
}
};
// Check if ValueMap has reg number.
- DenseMap<const Value *, unsigned>::const_iterator
+ DenseMap<const Value *, Register>::const_iterator
VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
@@ -5709,6 +5562,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = IsDbgDeclare;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5722,28 +5576,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
-
- // If the argument arrives in a stack slot, then what the IR thought was a
- // normal Value is actually in memory, and we must add a deref to load it.
- if (Op->isFI()) {
- int FI = Op->getIndex();
- unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
- if (Expr->isImplicit()) {
- SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
- Expr = DIExpression::prependOpcodes(Expr, Ops);
- } else {
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- }
- }
-
- // If this location was specified with a dbg.declare, then it and its
- // expression calculate the address of the variable. Append a deref to
- // force it to be a memory location.
- if (IsDbgDeclare)
- Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
-
+ IsIndirect = (Op->isReg()) ? IsIndirect : true;
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
*Op, Variable, Expr));
return true;
@@ -5787,6 +5622,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
return ISD::SDIVFIX;
case Intrinsic::udiv_fix:
return ISD::UDIVFIX;
+ case Intrinsic::sdiv_fix_sat:
+ return ISD::SDIVFIXSAT;
+ case Intrinsic::udiv_fix_sat:
+ return ISD::UDIVFIXSAT;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
@@ -5798,7 +5637,24 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
SDValue Callee = DAG.getExternalSymbol(
FunctionName,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
- LowerCallTo(&I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall());
+}
+
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+ assert(cast<CallBase>(PreallocatedSetup)
+ ->getCalledFunction()
+ ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+ "expected call_preallocated_setup Value");
+ for (auto *U : PreallocatedSetup->users()) {
+ auto *UseCall = cast<CallBase>(U);
+ const Function *Fn = UseCall->getCalledFunction();
+ if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+ return UseCall;
+ }
+ }
+ llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
/// Lower the call to the specified intrinsic function.
@@ -5814,6 +5670,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
return;
+ case Intrinsic::vscale: {
+ match(&I, m_VScale(DAG.getDataLayout()));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I,
+ DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
+ return;
+ }
case Intrinsic::vastart: visitVAStart(I); return;
case Intrinsic::vaend: visitVAEnd(I); return;
case Intrinsic::vacopy: visitVACopy(I); return;
@@ -5835,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
+ case Intrinsic::read_volatile_register:
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
@@ -5863,16 +5727,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
- unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
- unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
- unsigned Align = MinAlign(DstAlign, SrcAlign);
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
- false, isTC,
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
+ /* AlwaysInline */ false, isTC,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MC);
+ return;
+ }
+ case Intrinsic::memcpy_inline: {
+ const auto &MCI = cast<MemCpyInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
+ // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MCI.getDestAlign().valueOrOne();
+ Align SrcAlign = MCI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ bool isVol = MCI.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ // FIXME: Support passing different dest/src alignments to the memcpy DAG
+ // node.
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
+ /* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
@@ -5884,12 +5769,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memset defines 0 and 1 to both mean no alignment.
- unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
+ Align Alignment = MSI.getDestAlign().valueOrOne();
bool isVol = MSI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
- isTC, MachinePointerInfo(I.getArgOperand(0)));
+ SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+ MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5899,15 +5784,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
// @llvm.memmove defines 0 and 1 to both mean no alignment.
- unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
- unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
- unsigned Align = MinAlign(DstAlign, SrcAlign);
+ Align DstAlign = MMI.getDestAlign().valueOrOne();
+ Align SrcAlign = MMI.getSourceAlign().valueOrOne();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
@@ -5923,7 +5808,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
@@ -5941,7 +5826,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
SrcAlign, Length, LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()),
@@ -5958,13 +5843,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned DstAlign = MI.getDestAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
- bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::call_preallocated_setup: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+ getRoot(), SrcValue);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return;
+ }
+ case Intrinsic::call_preallocated_arg: {
+ const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+ SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = SrcValue;
+ Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+ MVT::i32); // arg index
+ SDValue Res = DAG.getNode(
+ ISD::PREALLOCATED_ARG, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -5972,12 +5881,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
assert(Variable && "Missing variable");
-
+ LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
+ << "\n");
// Check if address has undef value.
const Value *Address = DI.getVariableLocation();
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (bad/undef/unused-arg address)\n");
return;
}
@@ -6006,6 +5917,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
+ } else {
+ LLVM_DEBUG(dbgs() << "Skipping " << DI
+ << " (variable info stashed in MF side table)\n");
}
return;
}
@@ -6040,7 +5954,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
+ << " (could not emit func-arg dbg_value)\n");
}
}
return;
@@ -6192,6 +6107,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
+ case Intrinsic::roundeven:
case Intrinsic::canonicalize: {
unsigned Opcode;
switch (Intrinsic) {
@@ -6206,6 +6122,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
@@ -6269,7 +6186,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return;
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
@@ -6456,7 +6373,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::sdiv_fix:
- case Intrinsic::udiv_fix: {
+ case Intrinsic::udiv_fix:
+ case Intrinsic::sdiv_fix_sat:
+ case Intrinsic::udiv_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -6466,9 +6385,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
- Res = DAG.getNode(
- ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
@@ -6479,7 +6397,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
@@ -6493,13 +6411,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::stackguard: {
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
} else {
+ EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
unsigned Align = DL->getPrefTypeAlignment(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
@@ -6516,7 +6434,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Src, Chain = getRoot();
if (TLI.useLoadStackGuardNode())
@@ -6528,6 +6445,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI.setStackProtectorIndex(FI);
+ EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
@@ -6606,7 +6524,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
- setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
+ Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
return;
case Intrinsic::expect:
@@ -6678,12 +6598,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
- SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
- DAG.getVTList(MVT::Other), Ops,
- EVT::getIntegerVT(*Context, 8),
- MachinePointerInfo(I.getArgOperand(0)),
- 0, /* align */
- Flags);
+ SDValue Result = DAG.getMemIntrinsicNode(
+ ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
+ /* align */ None, Flags);
// Chain the prefetch in parallell with any pending loads, to stay out of
// the way of later optimizations.
@@ -6750,10 +6668,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
- visitPatchpoint(&I);
+ visitPatchpoint(I);
return;
case Intrinsic::experimental_gc_statepoint:
- LowerStatepoint(ImmutableStatepoint(&I));
+ LowerStatepoint(cast<GCStatepointInst>(I));
return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
@@ -6794,7 +6712,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::localrecover: {
// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
@@ -6805,6 +6722,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ EVT PtrVT = FPVal.getValueType();
+
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
@@ -6812,8 +6733,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
// Add the offset to the FP.
- Value *FP = I.getArgOperand(1);
- SDValue FPVal = getValue(FP);
SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
@@ -6996,11 +6915,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Ptr = getValue(I.getOperand(0));
SDValue Const = getValue(I.getOperand(1));
- EVT DestVT =
- EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ EVT PtrVT = Ptr.getValueType();
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
+ return;
+ }
+ case Intrinsic::get_active_lane_mask: {
+ auto DL = getCurSDLoc();
+ SDValue Index = getValue(I.getOperand(0));
+ SDValue BTC = getValue(I.getOperand(1));
+ Type *ElementTy = I.getOperand(0)->getType();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ unsigned VecWidth = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> OpsBTC;
+ SmallVector<SDValue, 16> OpsIndex;
+ SmallVector<SDValue, 16> OpsStepConstants;
+ for (unsigned i = 0; i < VecWidth; i++) {
+ OpsBTC.push_back(BTC);
+ OpsIndex.push_back(Index);
+ OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
+ }
- setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
- DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ EVT CCVT = MVT::i1;
+ CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth);
+
+ auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth));
+ SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
+ SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
+ SDValue VectorInduction = DAG.getNode(
+ ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
+ SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
+ SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
+ VectorBTC, ISD::CondCode::SETULE);
+ setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
+ DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
+ SetCC));
return;
}
}
@@ -7032,14 +6982,67 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
Opers.push_back(getValue(FPI.getArgOperand(1)));
}
+ auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
+ assert(Result.getNode()->getNumValues() == 2);
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
+ SDValue OutChain = Result.getValue(1);
+ switch (EB) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ LLVM_FALLTHROUGH;
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+ };
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+
+ SDNodeFlags Flags;
+ if (EB == fp::ExceptionBehavior::ebIgnore)
+ Flags.setNoFPExcept(true);
+
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
+ Flags.copyFMF(*FPOp);
+
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
-#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case Intrinsic::INTRINSIC: \
Opcode = ISD::STRICT_##DAGN; \
break;
#include "llvm/IR/ConstrainedOps.def"
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Opcode = ISD::STRICT_FMA;
+ // Break fmuladd into fmul and fadd.
+ if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
+ !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
+ ValueVTs[0])) {
+ Opers.pop_back();
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
+ pushOutChain(Mul, EB);
+ Opcode = ISD::STRICT_FADD;
+ Opers.clear();
+ Opers.push_back(Mul.getValue(1));
+ Opers.push_back(Mul.getValue(0));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ }
+ break;
+ }
}
// A few strict DAG nodes carry additional operands that are not
@@ -7058,32 +7061,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
}
}
- SDVTList VTs = DAG.getVTList(ValueVTs);
- SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
-
- assert(Result.getNode()->getNumValues() == 2);
-
- // Push node to the appropriate list so that future instructions can be
- // chained up correctly.
- SDValue OutChain = Result.getValue(1);
- switch (FPI.getExceptionBehavior().getValue()) {
- case fp::ExceptionBehavior::ebIgnore:
- // The only reason why ebIgnore nodes still need to be chained is that
- // they might depend on the current rounding mode, and therefore must
- // not be moved across instruction that may change that mode.
- LLVM_FALLTHROUGH;
- case fp::ExceptionBehavior::ebMayTrap:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks.
- PendingConstrainedFP.push_back(OutChain);
- break;
- case fp::ExceptionBehavior::ebStrict:
- // These must not be moved across calls or instructions that may change
- // floating-point exception masks or read floating-point exception flags.
- // In addition, they cannot be optimized out even if unused.
- PendingConstrainedFPStrict.push_back(OutChain);
- break;
- }
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
+ pushOutChain(Result, EB);
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
@@ -7150,10 +7129,9 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
// There is a platform (e.g. wasm) that uses funclet style IR but does not
// actually use outlined funclets and their LSDA info style.
if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
- assert(CLI.CS);
+ assert(CLI.CB);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
- BeginLabel, EndLabel);
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel);
} else if (!isScopedEHPersonality(Pers)) {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
@@ -7162,15 +7140,15 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
return Result;
}
-void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall,
const BasicBlock *EHPadBB) {
auto &DL = DAG.getDataLayout();
- FunctionType *FTy = CS.getFunctionType();
- Type *RetTy = CS.getType();
+ FunctionType *FTy = CB.getFunctionType();
+ Type *RetTy = CB.getType();
TargetLowering::ArgListTy Args;
- Args.reserve(CS.arg_size());
+ Args.reserve(CB.arg_size());
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7178,7 +7156,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall) {
// Avoid emitting tail calls in functions with the disable-tail-calls
// attribute.
- auto *Caller = CS.getInstruction()->getParent()->getParent();
+ auto *Caller = CB.getParent()->getParent();
if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
"true")
isTailCall = false;
@@ -7191,10 +7169,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
isTailCall = false;
}
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
+ for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
TargetLowering::ArgListEntry Entry;
- const Value *V = *i;
+ const Value *V = *I;
// Skip empty types
if (V->getType()->isEmptyTy())
@@ -7203,16 +7180,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- Entry.setAttributes(&CS, i - CS.arg_begin());
+ Entry.setAttributes(&CB, I - CB.arg_begin());
// Use swifterror virtual register as input to the call.
if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node = DAG.getRegister(
- SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node =
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -7225,7 +7202,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If call site has a cfguardtarget operand bundle, create and add an
// additional ArgListEntry.
- if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
TargetLowering::ArgListEntry Entry;
Value *V = Bundle->Inputs[0];
SDValue ArgNode = getValue(V);
@@ -7237,7 +7214,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
isTailCall = false;
// Disable tail calls if there is an swifterror argument. Targets have not
@@ -7248,15 +7225,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
- .setConvergent(CS.isConvergent());
+ .setConvergent(CB.isConvergent())
+ .setIsPreallocated(
+ CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
- const Instruction *Inst = CS.getInstruction();
- Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
- setValue(Inst, Result.first);
+ Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
+ setValue(&CB, Result.first);
}
// The last element of CLI.InVals has the SDValue for swifterror return.
@@ -7265,8 +7243,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- Register VReg = SwiftError.getOrCreateVRegDefAt(
- CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
+ Register VReg =
+ SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
}
@@ -7281,7 +7259,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
Type *LoadTy =
Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
if (LoadVT.isVector())
- LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+ LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -7455,11 +7433,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
- unsigned DstAlign = DAG.InferPtrAlignment(Dst);
- unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- unsigned Align = std::min(DstAlign, SrcAlign);
- if (Align == 0) // Alignment of one or both could not be inferred.
- Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+ Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
+ Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
+ // DAG::getMemcpy needs Alignment to be defined.
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = false;
SDLoc sdl = getCurSDLoc();
@@ -7468,8 +7445,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
- false, /*isTailCall=*/false,
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
+ /*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
assert(MC.getNode() != nullptr &&
@@ -7611,8 +7588,8 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
- if (isa<InlineAsm>(I.getCalledValue())) {
- visitInlineAsm(&I);
+ if (I.isInlineAsm()) {
+ visitInlineAsm(I);
return;
}
@@ -7778,12 +7755,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
- assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
- LLVMContext::OB_funclet,
- LLVMContext::OB_cfguardtarget}) &&
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
"Cannot lower calls with arbitrary operand bundles!");
- SDValue Callee = getValue(I.getCalledValue());
+ SDValue Callee = getValue(I.getCalledOperand());
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
@@ -7791,7 +7768,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check if we can potentially perform a tail call. More detailed checking
// is be done within LowerCallTo, after more information about the call is
// known.
- LowerCallTo(&I, Callee, I.isTailCall());
+ LowerCallTo(I, Callee, I.isTailCall());
}
namespace {
@@ -7834,7 +7811,7 @@ public:
if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
- return TLI.getPointerTy(DL);
+ return TLI.getProgramPointerTy(DL);
llvm::Type *OpTy = CallOperandVal->getType();
@@ -7874,7 +7851,6 @@ public:
}
};
-using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
} // end anonymous namespace
@@ -7936,9 +7912,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
Type *Ty = OpVal->getType();
auto &DL = DAG.getDataLayout();
uint64_t TySize = DL.getTypeAllocSize(Ty);
- unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ int SSFI = MF.getFrameInfo().CreateStackObject(
+ TySize, DL.getPrefTypeAlign(Ty), false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI),
@@ -8083,13 +8059,13 @@ class ExtraFlags {
unsigned Flags = 0;
public:
- explicit ExtraFlags(ImmutableCallSite CS) {
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ explicit ExtraFlags(const CallBase &Call) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
if (IA->hasSideEffects())
Flags |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
Flags |= InlineAsm::Extra_IsAlignStack;
- if (CS.isConvergent())
+ if (Call.isConvergent())
Flags |= InlineAsm::Extra_IsConvergent;
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
@@ -8116,23 +8092,24 @@ public:
} // end anonymous namespace
/// visitInlineAsm - Handle a call to an InlineAsm object.
-void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
/// ConstraintOperands - Information about all of the constraints.
- SDISelAsmOperandInfoVector ConstraintOperands;
+ SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
- DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
// AsmDialect, MayLoad, MayStore).
bool HasSideEffect = IA->hasSideEffects();
- ExtraFlags ExtraInfo(CS);
+ ExtraFlags ExtraInfo(Call);
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned NumMatchingOps = 0;
for (auto &T : TargetConstraints) {
ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
@@ -8140,14 +8117,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the value type for each operand.
if (OpInfo.Type == InlineAsm::isInput ||
(OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
- const Instruction *I = CS.getInstruction();
- if (isa<CallBrInst>(I) &&
- (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
- cast<CallBrInst>(I)->getNumIndirectDests())) {
+ if (isa<CallBrInst>(Call) &&
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ cast<CallBrInst>(&Call)->getNumIndirectDests() -
+ NumMatchingOps) &&
+ (NumMatchingOps == 0 ||
+ ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8164,20 +8144,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT = TLI.getSimpleValueType(
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
OpInfo.ConstraintVT =
- TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());
}
++ResNo;
} else {
OpInfo.ConstraintVT = MVT::Other;
}
+ if (OpInfo.hasMatchingInput())
+ ++NumMatchingOps;
+
if (!HasSideEffect)
HasSideEffect = OpInfo.hasMemory(TLI);
@@ -8191,9 +8174,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
// We've delayed emitting a diagnostic like the "n" constraint because
// inlining could cause an integer showing up.
- return emitInlineAsmError(
- CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
- "integer constant expression");
+ return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
+ "' expects an integer constant "
+ "expression");
ExtraInfo.update(T);
}
@@ -8203,7 +8186,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// memory and is nonvolatile.
SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
- bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+ bool IsCallBr = isa<CallBrInst>(Call);
if (IsCallBr) {
// If this is a callbr we need to flush pending exports since inlineasm_br
// is a terminator. We need to do this before nodes are glued to
@@ -8253,12 +8236,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> AsmNodeOperands;
AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
- IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
+ IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
// If we have a !srcloc metadata node associated with it, we want to attach
// this to the ultimately generated inline asm machineinstr. To do this, we
// pass in the third operand as this (potentially null) inline asm MDNode.
- const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ const MDNode *SrcLoc = Call.getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
@@ -8276,6 +8259,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
: OpInfo;
GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ auto DetectWriteToReservedRegister = [&]() {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
+ if (Register::isPhysicalRegister(Reg) &&
+ TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
+ const char *RegName = TRI.getName(Reg);
+ emitInlineAsmError(Call, "write to reserved register '" +
+ Twine(RegName) + "'");
+ return true;
+ }
+ }
+ return false;
+ };
+
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
@@ -8296,11 +8294,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
- CS, "couldn't allocate output register for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ Call, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
+ if (DetectWriteToReservedRegister())
+ return;
+
// Add information to the INLINEASM node to know that this register is
// set.
OpInfo.AssignedRegs.AddInlineAsmOperands(
@@ -8325,9 +8326,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
- emitInlineAsmError(CS, "inline asm not supported yet:"
- " don't know how to handle tied "
- "indirect register inputs");
+ emitInlineAsmError(Call, "inline asm not supported yet: "
+ "don't know how to handle tied "
+ "indirect register inputs");
return;
}
@@ -8341,8 +8342,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(RegInfo.createVirtualRegister(RC));
} else {
- emitInlineAsmError(CS, "inline asm error: This value type register "
- "class is not natively supported!");
+ emitInlineAsmError(Call,
+ "inline asm error: This value type register "
+ "class is not natively supported!");
return;
}
@@ -8350,8 +8352,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
- CS.getInstruction());
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -8385,13 +8386,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Ops.empty()) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
if (isa<ConstantSDNode>(InOperandVal)) {
- emitInlineAsmError(CS, "value out of range for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
- emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call,
+ "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
@@ -8432,23 +8434,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// TODO: Support this.
if (OpInfo.isIndirect) {
emitInlineAsmError(
- CS, "Don't know how to handle indirect register inputs yet "
- "for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ Call, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
- emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(Call,
+ "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
+ if (DetectWriteToReservedRegister())
+ return;
+
SDLoc dl = getCurSDLoc();
- OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
- Chain, &Flag, CS.getInstruction());
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ &Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
dl, DAG, AsmNodeOperands);
@@ -8480,12 +8486,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SmallVector<SDValue, 1> ResultValues;
SmallVector<SDValue, 8> OutChains;
- llvm::Type *CSResultType = CS.getType();
+ llvm::Type *CallResultType = Call.getType();
ArrayRef<Type *> ResultTypes;
- if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+ if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
ResultTypes = StructResult->elements();
- else if (!CSResultType->isVoidTy())
- ResultTypes = makeArrayRef(CSResultType);
+ else if (!CallResultType->isVoidTy())
+ ResultTypes = makeArrayRef(CallResultType);
auto CurResultType = ResultTypes.begin();
auto handleRegAssign = [&](SDValue V) {
@@ -8529,8 +8535,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.ConstraintType) {
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
- Val = OpInfo.AssignedRegs.getCopyFromRegs(
- DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, &Flag, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
@@ -8552,7 +8558,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OutChains.push_back(Store);
} else {
// generate CopyFromRegs to associated registers.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
if (Val.getOpcode() == ISD::MERGE_VALUES) {
for (const SDValue &V : Val->op_values())
handleRegAssign(V);
@@ -8571,7 +8577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
DAG.getVTList(ResultVTs), ResultValues);
- setValue(CS.getInstruction(), V);
+ setValue(&Call, V);
}
// Collect store chains.
@@ -8583,15 +8589,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG.setRoot(Chain);
}
-void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
+void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
const Twine &Message) {
LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(), Message);
+ Ctx.emitError(&Call, Message);
// Make sure we leave the DAG in a valid state
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 1> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
if (ValueVTs.empty())
return;
@@ -8600,7 +8606,7 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
- setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
+ setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
}
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
@@ -8616,7 +8622,7 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
SDValue V = DAG.getVAArg(
TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
- DL.getABITypeAlignment(I.getType()));
+ DL.getABITypeAlign(I.getType()).value());
DAG.setRoot(V.getValue(1));
if (I.getType()->isPointerTy())
@@ -8711,7 +8717,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
- .setIsPatchPoint(IsPatchPoint);
+ .setIsPatchPoint(IsPatchPoint)
+ .setIsPreallocated(
+ Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
@@ -8731,11 +8739,11 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
-static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
+static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
- for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
- SDValue OpVal = Builder.getValue(CS.getArgument(i));
+ for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
Ops.push_back(
Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
@@ -8761,7 +8769,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
- Callee = getValue(CI.getCalledValue());
+ Callee = getValue(CI.getCalledOperand());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
// The stackmap intrinsic only records the live variables (the arguments
@@ -8787,7 +8795,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
MVT::i32));
// Push live variables for the stack map.
- addStackMapLiveVars(&CI, 2, DL, Ops, *this);
+ addStackMapLiveVars(CI, 2, DL, Ops, *this);
// We are not pushing any register mask info here on the operands list,
// because the stackmap doesn't clobber anything.
@@ -8814,7 +8822,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
}
/// Lower llvm.experimental.patchpoint directly to its target opcode.
-void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
const BasicBlock *EHPadBB) {
// void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
// i32 <numBytes>,
@@ -8823,11 +8831,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// [Args...],
// [live variables...])
- CallingConv::ID CC = CS.getCallingConv();
+ CallingConv::ID CC = CB.getCallingConv();
bool IsAnyRegCC = CC == CallingConv::AnyReg;
- bool HasDef = !CS->getType()->isVoidTy();
+ bool HasDef = !CB.getType()->isVoidTy();
SDLoc dl = getCurSDLoc();
- SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+ SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
// Handle immediate and symbolic callees.
if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
@@ -8839,23 +8847,23 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
SymbolicCallee->getValueType(0));
// Get the real number of arguments participating in the call <numArgs>
- SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
+ SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// Intrinsics include all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
+ assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
- IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
- populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
- NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
+ populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
+ ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -8873,10 +8881,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
SmallVector<SDValue, 8> Ops;
// Add the <id> and <numBytes> constants.
- SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
+ SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
- SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
+ SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
Ops.push_back(DAG.getTargetConstant(
cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
MVT::i32));
@@ -8898,14 +8906,14 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// place these in any free register.
if (IsAnyRegCC)
for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
- Ops.push_back(getValue(CS.getArgument(i)));
+ Ops.push_back(getValue(CB.getArgOperand(i)));
// Push the arguments from the call instruction up to the register mask.
SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
Ops.append(Call->op_begin() + 2, e);
// Push live variables for the stack map.
- addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
+ addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
// Push the register mask info.
if (HasGlue)
@@ -8926,7 +8934,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Create the return types based on the intrinsic definition
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 3> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
assert(ValueVTs.size() == 1 && "Expected only one return value type.");
// There is always a chain and a glue type at the end
@@ -8943,9 +8951,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
// Update the NodeMap.
if (HasDef) {
if (IsAnyRegCC)
- setValue(CS.getInstruction(), SDValue(MN, 0));
+ setValue(&CB, SDValue(MN, 0));
else
- setValue(CS.getInstruction(), Result.first);
+ setValue(&CB, Result.first);
}
// Fixup the consumers of the intrinsic. The chain and glue may be used in the
@@ -9094,9 +9102,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// assert(!CS.hasInAllocaArgument() &&
// "sret demotion is incompatible with inalloca");
uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
- unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
+ Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
- DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ DemoteStackIdx =
+ MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
DL.getAllocaAddrSpace());
@@ -9114,7 +9123,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
Entry.IsCFGuardTarget = false;
- Entry.Alignment = Align;
+ Entry.Alignment = Alignment;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -9230,6 +9239,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
+ if (Args[i].IsPreallocated) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9239,7 +9257,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].IsByVal || Args[i].IsInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
@@ -9248,12 +9266,12 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setByValSize(FrameSize);
// info is not there but there are cases it cannot get right.
- unsigned FrameAlign;
- if (Args[i].Alignment)
- FrameAlign = Args[i].Alignment;
+ Align FrameAlign;
+ if (auto MA = Args[i].Alignment)
+ FrameAlign = *MA;
else
- FrameAlign = getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(Align(FrameAlign));
+ FrameAlign = Align(getByValTypeAlignment(ElementTy, DL));
+ Flags.setByValAlign(FrameAlign);
}
if (Args[i].IsNest)
Flags.setNest();
@@ -9298,8 +9316,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setReturned();
}
- getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
+ CLI.CallConv, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -9311,7 +9329,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
- MyFlags.Flags.setOrigAlign(Align::None());
+ MyFlags.Flags.setOrigAlign(Align(1));
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9376,6 +9394,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
@@ -9384,7 +9404,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
- /* Alignment = */ 1);
+ HiddenSRetAlign);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -9551,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
- if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ if (!Arg || Arg->hasPassPointeeByValueAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9607,16 +9627,12 @@ static void tryToElideArgumentCopy(
"object size\n");
return;
}
- unsigned RequiredAlignment = AI->getAlignment();
- if (!RequiredAlignment) {
- RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment(
- AI->getAllocatedType());
- }
- if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ Align RequiredAlignment = AI->getAlign();
+ if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
"greater than stack argument alignment ("
- << RequiredAlignment << " vs "
- << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ << DebugStr(RequiredAlignment) << " vs "
+ << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
return;
}
@@ -9653,6 +9669,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
const DataLayout &DL = DAG.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
+ // In Naked functions we aren't going to save any registers.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return;
+
if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
@@ -9741,12 +9761,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
+ if (Arg.hasAttribute(Attribute::Preallocated)) {
+ Flags.setPreallocated();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // preallocated. This way we can know how many bytes we should've
+ // allocated and how many bytes a callee cleanup function will pop. If
+ // we port preallocated to more targets, we'll have to add custom
+ // preallocated handling in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
if (F.getCallingConv() == CallingConv::X86_INTR) {
// IA Interrupt passes frame (1st parameter) by value in the stack.
if (ArgNo == 0)
Flags.setByVal();
}
- if (Flags.isByVal() || Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = Arg.getParamByValType();
// For ByVal, size and alignment should be passed from FE. BE will
@@ -9786,7 +9815,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
- MyFlags.Flags.setOrigAlign(Align::None());
+ MyFlags.Flags.setOrigAlign(Align(1));
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9988,7 +10017,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
// Finally, if the target has anything special to do, allow it to do so.
- EmitFunctionEntryCode();
+ emitFunctionEntryCode();
}
/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
@@ -10040,7 +10069,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
}
Reg = RegOut;
} else {
- DenseMap<const Value *, unsigned>::iterator I =
+ DenseMap<const Value *, Register>::iterator I =
FuncInfo.ValueMap.find(PHIOp);
if (I != FuncInfo.ValueMap.end())
Reg = I->second;
@@ -10654,6 +10683,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
- SDValue N = getValue(I.getOperand(0));
- setValue(&I, N);
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Op = getValue(I.getOperand(0));
+
+ for (unsigned i = 0; i != NumValues; ++i)
+ Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
+ SDValue(Op.getNode(), Op.getResNo() + i));
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 18e0edf7fc047..f0b7fb0d52299 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,19 +14,16 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
#include "StatepointLowering.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Statepoint.h"
@@ -55,7 +52,6 @@ class CatchSwitchInst;
class CleanupPadInst;
class CleanupReturnInst;
class Constant;
-class ConstantInt;
class ConstrainedFPIntrinsic;
class DbgValueInst;
class DataLayout;
@@ -77,6 +73,7 @@ class PHINode;
class ResumeInst;
class ReturnInst;
class SDDbgValue;
+class SelectionDAG;
class StoreInst;
class SwiftErrorValueTracking;
class SwitchInst;
@@ -409,6 +406,8 @@ public:
SelectionDAGBuilder *SDB;
};
+ // Data related to deferred switch lowerings. Used to construct additional
+ // Basic Blocks in SelectionDAGISel::FinishBasicBlock.
std::unique_ptr<SDAGSwitchLowering> SL;
/// A StackProtectorDescriptor structure used to communicate stack protector
@@ -518,7 +517,6 @@ public:
void resolveOrClearDbgInfo();
SDValue getValue(const Value *V);
- bool findValue(const Value *V) const;
/// Return the SDNode for the specified IR value if it exists.
SDNode *getNodeForIRValue(const Value *V) {
@@ -557,7 +555,7 @@ public:
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
- void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall,
const BasicBlock *EHPadBB = nullptr);
// Lower range metadata from 0 to N to assert zext to an integer of nearest
@@ -627,7 +625,7 @@ public:
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
- void LowerStatepoint(ImmutableStatepoint ISP,
+ void LowerStatepoint(const GCStatepointInst &I,
const BasicBlock *EHPadBB = nullptr);
void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
@@ -764,7 +762,7 @@ private:
void visitStoreToSwiftError(const StoreInst &I);
void visitFreeze(const FreezeInst &I);
- void visitInlineAsm(ImmutableCallSite CS);
+ void visitInlineAsm(const CallBase &Call);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -774,8 +772,7 @@ private:
void visitVAEnd(const CallInst &I);
void visitVACopy(const CallInst &I);
void visitStackmap(const CallInst &I);
- void visitPatchpoint(ImmutableCallSite CS,
- const BasicBlock *EHPadBB = nullptr);
+ void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr);
// These two are implemented in StatepointLowering.cpp
void visitGCRelocate(const GCRelocateInst &Relocate);
@@ -795,7 +792,7 @@ private:
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
- void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
+ void emitInlineAsmError(const CallBase &Call, const Twine &Message);
/// If V is an function argument then create corresponding DBG_VALUE machine
/// instruction for it now. At the end of instruction selection, they will be
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 6fd71393bf38c..42e3016e65b83 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -65,7 +65,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (G)
if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
- return TII->getName(getMachineOpcode());
+ return std::string(TII->getName(getMachineOpcode()));
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
@@ -106,6 +106,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::TokenFactor: return "TokenFactor";
case ISD::AssertSext: return "AssertSext";
case ISD::AssertZext: return "AssertZext";
+ case ISD::AssertAlign: return "AssertAlign";
case ISD::BasicBlock: return "BasicBlock";
case ISD::VALUETYPE: return "ValueType";
@@ -170,6 +171,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CopyToReg: return "CopyToReg";
case ISD::CopyFromReg: return "CopyFromReg";
case ISD::UNDEF: return "undef";
+ case ISD::VSCALE: return "vscale";
case ISD::MERGE_VALUES: return "merge_values";
case ISD::INLINEASM: return "inlineasm";
case ISD::INLINEASM_BR: return "inlineasm_br";
@@ -210,6 +212,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::STRICT_FROUND: return "strict_fround";
+ case ISD::FROUNDEVEN: return "froundeven";
+ case ISD::STRICT_FROUNDEVEN: return "strict_froundeven";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
@@ -313,7 +317,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULFIXSAT: return "umulfixsat";
case ISD::SDIVFIX: return "sdivfix";
+ case ISD::SDIVFIXSAT: return "sdivfixsat";
case ISD::UDIVFIX: return "udivfix";
+ case ISD::UDIVFIXSAT: return "udivfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -341,7 +347,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
@@ -387,6 +395,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
+ case ISD::FREEZE: return "freeze";
+ case ISD::PREALLOCATED_SETUP:
+ return "call_setup";
+ case ISD::PREALLOCATED_ARG:
+ return "call_alloc";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -547,9 +560,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasAllowReassociation())
OS << " reassoc";
- if (getFlags().hasVectorReduction())
- OS << " vector-reduction";
-
if (getFlags().hasNoFPExcept())
OS << " nofpexcept";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c57c72d47a7f..1f0432196a2d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -215,6 +215,7 @@ namespace llvm {
OptLevelChanger(SelectionDAGISel &ISel,
CodeGenOpt::Level NewOptLevel) : IS(ISel) {
SavedOptLevel = IS.OptLevel;
+ SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == SavedOptLevel)
return;
IS.OptLevel = NewOptLevel;
@@ -223,7 +224,6 @@ namespace llvm {
<< IS.MF->getFunction().getName() << "\n");
LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O"
<< NewOptLevel << "\n");
- SavedFastISel = IS.TM.Options.EnableFastISel;
if (NewOptLevel == CodeGenOpt::None) {
IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
LLVM_DEBUG(
@@ -337,7 +337,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
+ if (OptLevel != CodeGenOpt::None)
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -441,9 +442,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
+ BlockFrequencyInfo *BFI = nullptr;
+ if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None)
+ BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -513,15 +514,15 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// registers. If we don't apply the reg fixups before, some registers may
// appear as unused and will be skipped, resulting in bad MI.
MachineRegisterInfo &MRI = MF->getRegInfo();
- for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+ for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(),
E = FuncInfo->RegFixups.end();
I != E; ++I) {
- unsigned From = I->first;
- unsigned To = I->second;
+ Register From = I->first;
+ Register To = I->second;
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
while (true) {
- DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E)
break;
To = J->second;
@@ -622,7 +623,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Otherwise this is another use or second copy use.
CopyUseMI = nullptr; break;
}
- if (CopyUseMI) {
+ if (CopyUseMI &&
+ TRI.getRegSizeInBits(LDI->second, MRI) ==
+ TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) {
// Use MI's debug location, which describes where Variable was
// declared, rather than whatever is attached to CopyUseMI.
MachineInstr *NewMI =
@@ -658,36 +661,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if floating point is used for msvc
computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
- // Replace forward-declared registers with the registers containing
- // the desired value.
- for (DenseMap<unsigned, unsigned>::iterator
- I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
- I != E; ++I) {
- unsigned From = I->first;
- unsigned To = I->second;
- // If To is also scheduled to be replaced, find what its ultimate
- // replacement is.
- while (true) {
- DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
- if (J == E) break;
- To = J->second;
- }
- // Make sure the new register has a sufficiently constrained register class.
- if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
- MRI.constrainRegClass(To, MRI.getRegClass(From));
- // Replace it.
-
-
- // Replacing one register with another won't touch the kill flags.
- // We need to conservatively clear the kill flags as a kill on the old
- // register might dominate existing uses of the new register.
- if (!MRI.use_empty(To))
- MRI.clearKillFlags(From);
- MRI.replaceRegWith(From, To);
- }
-
- TLI->finalizeLowering(*MF);
-
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -1321,8 +1294,11 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
assert(DI->getVariable() && "Missing variable");
assert(DI->getDebugLoc() && "Missing location");
const Value *Address = DI->getAddress();
- if (!Address)
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
+ << " (bad address)\n");
continue;
+ }
// Look through casts and constant offset GEPs. These mostly come from
// inalloca.
@@ -1347,6 +1323,8 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
if (Offset.getBoolValue())
Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
Offset.getZExtValue());
+ LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI
+ << ", " << *DI << "\n");
MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
@@ -1513,8 +1491,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// to keep track of gc-relocates for a particular gc-statepoint. This is
// done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
// visitGCRelocate.
- if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
- !isGCResult(Inst)) {
+ if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) &&
+ !isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) {
OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
Inst->getDebugLoc(), LLVMBB);
@@ -1532,7 +1510,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
!Inst->use_empty()) {
- unsigned &R = FuncInfo->ValueMap[Inst];
+ Register &R = FuncInfo->ValueMap[Inst];
if (!R)
R = FuncInfo->CreateRegs(Inst);
}
@@ -2234,14 +2212,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
-void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
SDLoc DL(N);
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops, DL);
const EVT VTs[] = {MVT::Other, MVT::Glue};
- SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
+ SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops);
New->setNodeId(-1);
ReplaceUses(N, New.getNode());
CurDAG->RemoveDeadNode(N);
@@ -2285,6 +2263,14 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
}
+void SelectionDAGISel::Select_FREEZE(SDNode *N) {
+ // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
+ // If FREEZE instruction is added later, the code below must be changed as
+ // well.
+ CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0),
+ N->getOperand(0));
+}
+
/// GetVBR - decode a vbr encoding whose top bit is set.
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -2804,13 +2790,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
return;
case ISD::AssertSext:
case ISD::AssertZext:
+ case ISD::AssertAlign:
ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
case ISD::INLINEASM_BR:
- Select_INLINEASM(NodeToMatch,
- NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
+ Select_INLINEASM(NodeToMatch);
return;
case ISD::READ_REGISTER:
Select_READ_REGISTER(NodeToMatch);
@@ -2821,6 +2807,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::UNDEF:
Select_UNDEF(NodeToMatch);
return;
+ case ISD::FREEZE:
+ Select_FREEZE(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -3693,12 +3682,11 @@ bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
// Detect when "or" is used to add an offset to a stack object.
if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
MachineFrameInfo &MFI = MF->getFrameInfo();
- unsigned A = MFI.getObjectAlignment(FN->getIndex());
- assert(isPowerOf2_32(A) && "Unexpected alignment");
+ Align A = MFI.getObjectAlign(FN->getIndex());
int32_t Off = C->getSExtValue();
// If the alleged offset fits in the zero bits guaranteed by
// the alignment, then this or is really an add.
- return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+ return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off));
}
return false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cdc09d59f6a49..059a6baf967af 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -70,7 +70,7 @@ namespace llvm {
}
static std::string getGraphName(const SelectionDAG *G) {
- return G->getMachineFunction().getName();
+ return std::string(G->getMachineFunction().getName());
}
static bool renderGraphFromBottomUp() {
@@ -164,6 +164,20 @@ void SelectionDAG::viewGraph() {
viewGraph("");
}
+/// Just dump dot graph to a user-provided path and title.
+/// This doesn't open the dot viewer program and
+/// helps visualization when outside debugging session.
+/// FileName expects absolute path. If provided
+/// without any path separators then the file
+/// will be created in the current directory.
+/// Error will be emitted if the path is insane.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName,
+ const Twine &Title) {
+ dumpDotGraphToFile(this, FileName, Title);
+}
+#endif
+
/// clearGraphAttrs - Clear all previously defined node graph attributes.
/// Intended to be used from a debugging tool (eg. gdb).
void SelectionDAG::clearGraphAttrs() {
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index c628f379e415d..2cb57c1d1ccc8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +63,10 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
STATISTIC(StatepointMaxSlotsRequired,
"Maximum number of stack slots required for a singe statepoint");
+cl::opt<bool> UseRegistersForDeoptValues(
+ "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for non pointer deopt args"));
+
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
SelectionDAGBuilder &Builder, uint64_t Value) {
SDLoc L = Builder.getCurSDLoc();
@@ -215,6 +221,28 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return None;
}
+
+/// Return true if-and-only-if the given SDValue can be lowered as either a
+/// constant argument or a stack reference. The key point is that the value
+/// doesn't need to be spilled or tracked as a vreg use.
+static bool willLowerDirectly(SDValue Incoming) {
+ // We are making an unchecked assumption that the frame size <= 2^16 as that
+ // is the largest offset which can be encoded in the stackmap format.
+ if (isa<FrameIndexSDNode>(Incoming))
+ return true;
+
+ // The largest constant describeable in the StackMap format is 64 bits.
+ // Potential Optimization: Constants values are sign extended by consumer,
+ // and thus there are many constants of static type > 64 bits whose value
+ // happens to be sext(Con64) and could thus be lowered directly.
+ if (Incoming.getValueType().getSizeInBits() > 64)
+ return false;
+
+ return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) ||
+ Incoming.isUndef());
+}
+
+
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -224,11 +252,10 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
SDValue Incoming = Builder.getValue(IncomingValue);
- if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
- // We won't need to spill this, so no need to check for previously
- // allocated stack slots
+ // If we won't spill this, we don't need to check for previously allocated
+ // stack slots.
+ if (willLowerDirectly(Incoming))
return;
- }
SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
if (OldLocation.getNode())
@@ -268,45 +295,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
-/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
-/// is not required for correctness. It's purpose is to reduce the size of
-/// StackMap section. It has no effect on the number of spill slots required
-/// or the actual lowering.
-static void
-removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
- SmallVectorImpl<const Value *> &Ptrs,
- SmallVectorImpl<const GCRelocateInst *> &Relocs,
- SelectionDAGBuilder &Builder,
- FunctionLoweringInfo::StatepointSpillMap &SSM) {
- DenseMap<SDValue, const Value *> Seen;
-
- SmallVector<const Value *, 64> NewBases, NewPtrs;
- SmallVector<const GCRelocateInst *, 64> NewRelocs;
- for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
- SDValue SD = Builder.getValue(Ptrs[i]);
- auto SeenIt = Seen.find(SD);
-
- if (SeenIt == Seen.end()) {
- // Only add non-duplicates
- NewBases.push_back(Bases[i]);
- NewPtrs.push_back(Ptrs[i]);
- NewRelocs.push_back(Relocs[i]);
- Seen[SD] = Ptrs[i];
- } else {
- // Duplicate pointer found, note in SSM and move on:
- SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
- }
- }
- assert(Bases.size() >= NewBases.size());
- assert(Ptrs.size() >= NewPtrs.size());
- assert(Relocs.size() >= NewRelocs.size());
- Bases = NewBases;
- Ptrs = NewPtrs;
- Relocs = NewRelocs;
- assert(Ptrs.size() == Bases.size());
- assert(Ptrs.size() == Relocs.size());
-}
-
/// Extract call from statepoint, lower it and return pointer to the
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
@@ -353,9 +341,9 @@ static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
auto MMOFlags = MachineMemOperand::MOStore |
MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
auto &MFI = MF.getFrameInfo();
- return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
MFI.getObjectSize(FI.getIndex()),
- MFI.getObjectAlignment(FI.getIndex()));
+ MFI.getObjectAlign(FI.getIndex()));
}
/// Spill a value incoming to the statepoint. It might be either part of
@@ -393,10 +381,9 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// slots with preferred alignments larger than frame alignment..
auto &MF = Builder.DAG.getMachineFunction();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
- auto *StoreMMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- MFI.getObjectSize(Index),
- MFI.getObjectAlignment(Index));
+ auto *StoreMMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
StoreMMO);
@@ -412,59 +399,81 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// Lower a single value incoming to a statepoint node. This value can be
/// either a deopt value or a gc value, the handling is the same. We special
/// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
- SmallVectorImpl<SDValue> &Ops,
- SmallVectorImpl<MachineMemOperand*> &MemRefs,
- SelectionDAGBuilder &Builder) {
- // Note: We know all of these spills are independent, but don't bother to
- // exploit that chain wise. DAGCombine will happily do so as needed, so
- // doing it here would be a small compile time win at most.
- SDValue Chain = Builder.getRoot();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+static void
+lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
+ SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SelectionDAGBuilder &Builder) {
+
+ if (willLowerDirectly(Incoming)) {
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint (this is only
+ // really meaningful for a deopt value. For GC, we'd be trying to
+ // relocate the address of the alloca itself?)
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+ return;
+ }
+
+ assert(Incoming.getValueType().getSizeInBits() <= 64);
+
+ if (Incoming.isUndef()) {
+ // Put an easily recognized constant that's unlikely to be a valid
+ // value so that uses of undef by the consumer of the stackmap is
+ // easily recognized. This is legal since the compiler is always
+ // allowed to chose an arbitrary value for undef.
+ pushStackMapConstant(Ops, Builder, 0xFEFEFEFE);
+ return;
+ }
+
// If the original value was a constant, make sure it gets recorded as
// such in the stackmap. This is required so that the consumer can
// parse any internal format to the deopt state. It also handles null
- // pointers and other constant pointers in GC states. Note the constant
- // vectors do not appear to actually hit this path and that anything larger
- // than an i64 value (not type!) will fail asserts here.
- pushStackMapConstant(Ops, Builder, C->getSExtValue());
- } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
- // This handles allocas as arguments to the statepoint (this is only
- // really meaningful for a deopt value. For GC, we'd be trying to
- // relocate the address of the alloca itself?)
- assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
- "Incoming value is a frame index!");
- Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Builder.getFrameIndexTy()));
+ // pointers and other constant pointers in GC states.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder, C->getSExtValue());
+ return;
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Incoming)) {
+ pushStackMapConstant(Ops, Builder,
+ C->getValueAPF().bitcastToAPInt().getZExtValue());
+ return;
+ }
- auto &MF = Builder.DAG.getMachineFunction();
- auto *MMO = getMachineMemOperand(MF, *FI);
- MemRefs.push_back(MMO);
-
- } else if (LiveInOnly) {
+ llvm_unreachable("unhandled direct lowering case");
+ }
+
+
+
+ if (!RequireSpillSlot) {
// If this value is live in (not live-on-return, or live-through), we can
// treat it the same way patchpoint treats it's "live in" values. We'll
// end up folding some of these into stack references, but they'll be
// handled by the register allocator. Note that we do not have the notion
// of a late use so these values might be placed in registers which are
- // clobbered by the call. This is fine for live-in.
+ // clobbered by the call. This is fine for live-in. For live-through
+ // fix-up pass should be executed to force spilling of such registers.
Ops.push_back(Incoming);
} else {
- // Otherwise, locate a spill slot and explicitly spill it so it
- // can be found by the runtime later. We currently do not support
- // tracking values through callee saved registers to their eventual
- // spill location. This would be a useful optimization, but would
- // need to be optional since it requires a lot of complexity on the
- // runtime side which not all would support.
+ // Otherwise, locate a spill slot and explicitly spill it so it can be
+ // found by the runtime later. Note: We know all of these spills are
+ // independent, but don't bother to exploit that chain wise. DAGCombine
+ // will happily do so as needed, so doing it here would be a small compile
+ // time win at most.
+ SDValue Chain = Builder.getRoot();
auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
Ops.push_back(std::get<0>(Res));
if (auto *MMO = std::get<2>(Res))
MemRefs.push_back(MMO);
Chain = std::get<1>(Res);;
+ Builder.DAG.setRoot(Chain);
}
- Builder.DAG.setRoot(Chain);
}
/// Lower deopt state and gc pointer arguments of the statepoint. The actual
@@ -522,8 +531,18 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
const bool LiveInDeopt =
SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
- auto isGCValue =[&](const Value *V) {
- return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+ auto isGCValue = [&](const Value *V) {
+ auto *Ty = V->getType();
+ if (!Ty->isPtrOrPtrVectorTy())
+ return false;
+ if (auto *GFI = Builder.GFI)
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ return *IsManaged;
+ return true; // conservative
+ };
+
+ auto requireSpillSlot = [&](const Value *V) {
+ return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
};
// Before we actually start lowering (and allocating spill slots for values),
@@ -532,7 +551,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// doesn't change semantics at all. It is important for performance that we
// reserve slots for both deopt and gc values before lowering either.
for (const Value *V : SI.DeoptState) {
- if (!LiveInDeopt || isGCValue(V))
+ if (requireSpillSlot(V))
reservePreviousStackSlotForValue(V, Builder);
}
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
@@ -559,8 +578,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
}
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
- const bool LiveInValue = LiveInDeopt && !isGCValue(V);
- lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
+ Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -570,12 +589,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// (base[0], ptr[0], base[1], ptr[1], ...)
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
- Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Base),
+ /*RequireSpillSlot*/ true, Ops, MemRefs,
+ Builder);
const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
- Ops, MemRefs, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Ptr),
+ /*RequireSpillSlot*/ true, Ops, MemRefs,
+ Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
@@ -610,7 +631,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
if (Loc.getNode()) {
- SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+ SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
// and constants. For this values we can avoid emitting spill load while
@@ -618,7 +639,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// Actually we do not need to record them in this map at all.
// We do this only to check that we are not relocating any unvisited
// value.
- SpillMap.SlotMap[V] = None;
+ SpillMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
@@ -641,24 +662,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
+ assert(SI.Bases.size() == SI.Ptrs.size() &&
+ SI.Ptrs.size() <= SI.GCRelocates.size());
#ifndef NDEBUG
- // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
- // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
for (auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
StatepointLowering.scheduleRelocCall(*Reloc);
#endif
- // Remove any redundant llvm::Values which map to the same SDValue as another
- // input. Also has the effect of removing duplicates in the original
- // llvm::Value input list as well. This is a useful optimization for
- // reducing the size of the StackMap section. It has no other impact.
- removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
- FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
- assert(SI.Bases.size() == SI.Ptrs.size() &&
- SI.Ptrs.size() == SI.GCRelocates.size());
-
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
SmallVector<MachineMemOperand*, 16> MemRefs;
@@ -830,97 +842,109 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
void
-SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
const BasicBlock *EHPadBB /*= nullptr*/) {
- assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
+ assert(I.getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
#ifndef NDEBUG
- // If this is a malformed statepoint, report it early to simplify debugging.
- // This should catch any IR level mistake that's made when constructing or
- // transforming statepoints.
- ISP.verify();
-
// Check that the associated GCStrategy expects to encounter statepoints.
assert(GFI->getStrategy().useStatepoints() &&
"GCStrategy does not expect to encounter statepoints");
#endif
SDValue ActualCallee;
+ SDValue Callee = getValue(I.getActualCalledOperand());
- if (ISP.getNumPatchBytes() > 0) {
+ if (I.getNumPatchBytes() > 0) {
// If we've been asked to emit a nop sequence instead of a call instruction
// for this statepoint then don't lower the call target, but use a constant
- // `null` instead. Not lowering the call target lets statepoint clients get
- // away without providing a physical address for the symbolic call target at
- // link time.
-
- const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &DL = DAG.getDataLayout();
-
- unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
- ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS));
+ // `undef` instead. Not lowering the call target lets statepoint clients
+ // get away without providing a physical address for the symbolic call
+ // target at link time.
+ ActualCallee = DAG.getUNDEF(Callee.getValueType());
} else {
- ActualCallee = getValue(ISP.getCalledValue());
+ ActualCallee = Callee;
}
StatepointLoweringInfo SI(DAG);
- populateCallLoweringInfo(SI.CLI, ISP.getCall(),
- ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee,
- ISP.getActualReturnType(), false /* IsPatchPoint */);
-
- for (const GCRelocateInst *Relocate : ISP.getRelocates()) {
+ populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos,
+ I.getNumCallArgs(), ActualCallee,
+ I.getActualReturnType(), false /* IsPatchPoint */);
+
+ // There may be duplication in the gc.relocate list; such as two copies of
+ // each relocation on normal and exceptional path for an invoke. We only
+ // need to spill once and record one copy in the stackmap, but we need to
+ // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best
+ // handled as a CSE problem elsewhere.)
+ // TODO: There a couple of major stackmap size optimizations we could do
+ // here if we wished.
+ // 1) If we've encountered a derived pair {B, D}, we don't need to actually
+ // record {B,B} if it's seen later.
+ // 2) Due to rematerialization, actual derived pointers are somewhat rare;
+ // given that, we could change the format to record base pointer relocations
+ // separately with half the space. This would require a format rev and a
+ // fairly major rework of the STATEPOINT node though.
+ SmallSet<SDValue, 8> Seen;
+ for (const GCRelocateInst *Relocate : I.getGCRelocates()) {
SI.GCRelocates.push_back(Relocate);
- SI.Bases.push_back(Relocate->getBasePtr());
- SI.Ptrs.push_back(Relocate->getDerivedPtr());
+
+ SDValue DerivedSD = getValue(Relocate->getDerivedPtr());
+ if (Seen.insert(DerivedSD).second) {
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
}
- SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
- SI.StatepointInstr = ISP.getInstruction();
- SI.GCTransitionArgs =
- ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
- SI.ID = ISP.getID();
- SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
- SI.StatepointFlags = ISP.getFlags();
- SI.NumPatchBytes = ISP.getNumPatchBytes();
+ SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end());
+ SI.StatepointInstr = &I;
+ SI.ID = I.getID();
+
+ SI.DeoptState = ArrayRef<const Use>(I.deopt_begin(), I.deopt_end());
+ SI.GCTransitionArgs = ArrayRef<const Use>(I.gc_transition_args_begin(),
+ I.gc_transition_args_end());
+
+ SI.StatepointFlags = I.getFlags();
+ SI.NumPatchBytes = I.getNumPatchBytes();
SI.EHPadBB = EHPadBB;
SDValue ReturnValue = LowerAsSTATEPOINT(SI);
// Export the result value if needed
- const GCResultInst *GCResult = ISP.getGCResult();
- Type *RetTy = ISP.getActualReturnType();
- if (!RetTy->isVoidTy() && GCResult) {
- if (GCResult->getParent() != ISP.getCall()->getParent()) {
- // Result value will be used in a different basic block so we need to
- // export it now. Default exporting mechanism will not work here because
- // statepoint call has a different type than the actual call. It means
- // that by default llvm will create export register of the wrong type
- // (always i32 in our case). So instead we need to create export register
- // with correct type manually.
- // TODO: To eliminate this problem we can remove gc.result intrinsics
- // completely and make statepoint call to return a tuple.
- unsigned Reg = FuncInfo.CreateRegs(RetTy);
- RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy,
- ISP.getCall()->getCallingConv());
- SDValue Chain = DAG.getEntryNode();
-
- RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
- PendingExports.push_back(Chain);
- FuncInfo.ValueMap[ISP.getInstruction()] = Reg;
- } else {
- // Result value will be used in a same basic block. Don't export it or
- // perform any explicit register copies.
- // We'll replace the actuall call node shortly. gc_result will grab
- // this value.
- setValue(ISP.getInstruction(), ReturnValue);
- }
- } else {
- // The token value is never used from here on, just generate a poison value
- setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ const GCResultInst *GCResult = I.getGCResult();
+ Type *RetTy = I.getActualReturnType();
+
+ if (RetTy->isVoidTy() || !GCResult) {
+ // The return value is not needed, just generate a poison value.
+ setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ return;
+ }
+
+ if (GCResult->getParent() == I.getParent()) {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies. The gc_result will simply grab
+ // this value.
+ setValue(&I, ReturnValue);
+ return;
}
+
+ // Result value will be used in a different basic block so we need to export
+ // it now. Default exporting mechanism will not work here because statepoint
+ // call has a different type than the actual call. It means that by default
+ // llvm will create export register of the wrong type (always i32 in our
+ // case). So instead we need to create export register with correct type
+ // manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy,
+ I.getCallingConv());
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[&I] = Reg;
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
@@ -966,26 +990,23 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
// The result value of the gc_result is simply the result of the actual
// call. We've already emitted this, so just grab the value.
- const Instruction *I = CI.getStatepoint();
-
- if (I->getParent() != CI.getParent()) {
- // Statepoint is in different basic block so we should have stored call
- // result in a virtual register.
- // We can not use default getValue() functionality to copy value from this
- // register because statepoint and actual call return types can be
- // different, and getValue() will use CopyFromReg of the wrong type,
- // which is always i32 in our case.
- PointerType *CalleeType = cast<PointerType>(
- ImmutableStatepoint(I).getCalledValue()->getType());
- Type *RetTy =
- cast<FunctionType>(CalleeType->getElementType())->getReturnType();
- SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
-
- assert(CopyFromReg.getNode());
- setValue(&CI, CopyFromReg);
- } else {
- setValue(&CI, getValue(I));
+ const GCStatepointInst *SI = CI.getStatepoint();
+
+ if (SI->getParent() == CI.getParent()) {
+ setValue(&CI, getValue(SI));
+ return;
}
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actual call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ Type *RetTy = SI->getActualReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(SI, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
}
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
@@ -1005,6 +1026,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
const Value *DerivedPtr = Relocate.getDerivedPtr();
SDValue SD = getValue(DerivedPtr);
+ if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
+ // Lowering relocate(undef) as arbitrary constant. Current constant value
+ // is chosen such that it's unlikely to be a valid pointer.
+ setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64));
+ return;
+ }
+
auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
auto SlotIt = SpillMap.find(DerivedPtr);
assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
@@ -1020,26 +1048,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
unsigned Index = *DerivedPtrLocation;
SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
- // Note: We know all of these reloads are independent, but don't bother to
- // exploit that chain wise. DAGCombine will happily do so as needed, so
- // doing it here would be a small compile time win at most.
- SDValue Chain = getRoot();
+ // All the reloads are independent and are reading memory only modified by
+ // statepoints (i.e. no other aliasing stores); informing SelectionDAG of
+ // this this let's CSE kick in for free and allows reordering of instructions
+ // if possible. The lowering for statepoint sets the root, so this is
+ // ordering all reloads with the either a) the statepoint node itself, or b)
+ // the entry of the current block for an invoke statepoint.
+ const SDValue Chain = DAG.getRoot(); // != Builder.getRoot()
auto &MF = DAG.getMachineFunction();
auto &MFI = MF.getFrameInfo();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
- auto *LoadMMO =
- MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MFI.getObjectSize(Index),
- MFI.getObjectAlignment(Index));
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlign(Index));
auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
Relocate.getType());
SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
SpillSlot, LoadMMO);
-
- DAG.setRoot(SpillLoad.getValue(1));
+ PendingLoads.push_back(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
setValue(&Relocate, SpillLoad);
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 70507932681d0..634ef87f3840e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -15,11 +15,9 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include <cassert>
namespace llvm {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 24ab65171a179..96df20039b15d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -83,7 +83,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
- Register Reg = ArgLoc.getLocReg();
+ MCRegister Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
@@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
- unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
@@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = Call->getParamAlignment(ArgIdx);
+ Alignment = Call->getParamAlign(ArgIdx);
ByValType = nullptr;
- if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ if (IsByVal)
ByValType = Call->getParamByValType(ArgIdx);
+ PreallocatedType = nullptr;
+ if (IsPreallocated)
+ PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -176,38 +180,24 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
return LowerCallTo(CLI);
}
-bool
-TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- bool AllowOverlap,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes) const {
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+bool TargetLowering::findOptimalMemOpLowering(
+ std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
+ unsigned SrcAS, const AttributeList &FuncAttributes) const {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
return false;
- EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
- IsMemset, ZeroMemset, MemcpyStrSrc,
- FuncAttributes);
+ EVT VT = getOptimalMemOpType(Op, FuncAttributes);
if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
- while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
- !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ if (Op.isFixedDstAlign())
+ while (
+ Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value()))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());
// Find the largest legal integer type.
@@ -223,7 +213,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
}
unsigned NumMemOps = 0;
- while (Size != 0) {
+ uint64_t Size = Op.size();
+ while (Size) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
@@ -257,9 +248,10 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
- if (NumMemOps && AllowOverlap && NewVTSize < Size &&
- allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
- MachineMemOperand::MONone, &Fast) &&
+ if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
else {
@@ -491,13 +483,15 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
-bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
// Do target-specific constant optimization.
- if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+ if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
@@ -513,12 +507,12 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
// If this is a 'not' op, don't touch it because that's a canonical form.
const APInt &C = Op1C->getAPIntValue();
- if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
+ if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
return false;
- if (!C.isSubsetOf(Demanded)) {
+ if (!C.isSubsetOf(DemandedBits)) {
EVT VT = Op.getValueType();
- SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
@@ -530,6 +524,16 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
return false;
}
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
+ const APInt &DemandedBits,
+ TargetLoweringOpt &TLO) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
+}
+
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
@@ -598,6 +602,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
+
+ // TODO: We can probably do more work on calculating the known bits and
+ // simplifying the operations for scalable vectors, but for now we just
+ // bail out.
+ if (VT.isScalableVector()) {
+ // Pretend we don't know anything for now.
+ Known = KnownBits(DemandedBits.getBitWidth());
+ return false;
+ }
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -623,15 +637,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return DAG.getUNDEF(Op.getValueType());
unsigned NumElts = DemandedElts.getBitWidth();
+ unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
switch (Op.getOpcode()) {
case ISD::BITCAST: {
SDValue Src = peekThroughBitcasts(Op.getOperand(0));
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
+ if (SrcVT == DstVT)
+ return Src;
+
unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
-
if (NumSrcEltBits == NumDstEltBits)
if (SDValue V = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
@@ -719,6 +736,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
+ case ISD::SHL: {
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return Op0;
+ }
+ break;
+ }
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -727,7 +759,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
// width as the setcc result, and (3) the result of a setcc conforms to 0 or
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
- Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+ Op0.getScalarValueSizeInBits() == BitWidth &&
getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
@@ -742,9 +774,30 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
+ SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
- return Op.getOperand(0);
+ unsigned ExBits = ExVT.getScalarSizeInBits();
+ if (DemandedBits.getActiveBits() <= ExBits)
+ return Op0;
+ // If the input is already sign extended, just drop the extension.
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ if (NumSignBits >= (BitWidth - ExBits + 1))
+ return Op0;
+ break;
+ }
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ // If we only want the lowest element and none of extended bits, then we can
+ // return the bitcasted source vector.
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ DAG.getDataLayout().isLittleEndian() &&
+ DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
+ return DAG.getBitcast(DstVT, Src);
+ }
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -757,6 +810,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Vec;
break;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we don't demand the inserted subvector, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ return Vec;
+ break;
+ }
case ISD::VECTOR_SHUFFLE: {
ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
@@ -790,6 +853,25 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return SDValue();
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
+SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
+ SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
+ Depth);
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -805,6 +887,15 @@ bool TargetLowering::SimplifyDemandedBits(
assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
+ // Don't know anything.
+ Known = KnownBits(BitWidth);
+
+ // TODO: We can probably do more work on calculating the known bits and
+ // simplifying the operations for scalable vectors, but for now we just
+ // bail out.
+ if (Op.getValueType().isScalableVector())
+ return false;
+
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
@@ -815,9 +906,6 @@ bool TargetLowering::SimplifyDemandedBits(
SDLoc dl(Op);
auto &DL = TLO.DAG.getDataLayout();
- // Don't know anything.
- Known = KnownBits(BitWidth);
-
// Undef operand.
if (Op.isUndef())
return false;
@@ -850,7 +938,7 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
- KnownBits Known2, KnownOut;
+ KnownBits Known2;
switch (Op.getOpcode()) {
case ISD::TargetConstant:
llvm_unreachable("Can't simplify this node");
@@ -864,7 +952,11 @@ bool TargetLowering::SimplifyDemandedBits(
APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
- Known = SrcKnown.zextOrTrunc(BitWidth, false);
+
+ // Upper elements are undef, so only get the knownbits if we just demand
+ // the bottom element.
+ if (DemandedElts == 1)
+ Known = SrcKnown.anyextOrTrunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
@@ -877,6 +969,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (getTargetConstantFromLoad(LD)) {
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false; // Don't fall through, will infinitely loop.
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ EVT MemVT = LD->getMemoryVT();
+ unsigned MemBits = MemVT.getScalarSizeInBits();
+ Known.Zero.setBitsFrom(MemBits);
+ return false; // Don't fall through, will infinitely loop.
}
break;
}
@@ -904,7 +1002,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
- Known = KnownScl.zextOrTrunc(BitWidth, false);
+ Known = KnownScl.anyextOrTrunc(BitWidth);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
@@ -919,57 +1017,75 @@ bool TargetLowering::SimplifyDemandedBits(
return false;
}
case ISD::INSERT_SUBVECTOR: {
- SDValue Base = Op.getOperand(0);
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- EVT SubVT = Sub.getValueType();
- unsigned NumSubElts = SubVT.getVectorNumElements();
-
- // If index isn't constant, assume we need the original demanded base
- // elements and ALL the inserted subvector elements.
- APInt BaseElts = DemandedElts;
- APInt SubElts = APInt::getAllOnesValue(NumSubElts);
- if (isa<ConstantSDNode>(Op.getOperand(2))) {
- const APInt &Idx = Op.getConstantOperandAPInt(2);
- if (Idx.ule(NumElts - NumSubElts)) {
- unsigned SubIdx = Idx.getZExtValue();
- SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
- BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
- }
- }
-
- KnownBits KnownSub, KnownBase;
- if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
+ KnownBits KnownSub, KnownSrc;
+ if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
Depth + 1))
return true;
- if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
Depth + 1))
return true;
Known.Zero.setAllBits();
Known.One.setAllBits();
- if (!!SubElts) {
- Known.One &= KnownSub.One;
- Known.Zero &= KnownSub.Zero;
+ if (!!DemandedSubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
}
- if (!!BaseElts) {
- Known.One &= KnownBase.One;
- Known.Zero &= KnownBase.Zero;
+ if (!!DemandedSrcElts) {
+ Known.One &= KnownSrc.One;
+ Known.Zero &= KnownSrc.Zero;
+ }
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
+ !DemandedSrcElts.isAllOnesValue()) {
+ SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
+ SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSub || NewSrc) {
+ NewSub = NewSub ? NewSub : Sub;
+ NewSrc = NewSrc ? NewSrc : Src;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
+ Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
case ISD::EXTRACT_SUBVECTOR: {
- // If index isn't constant, assume we need all the source vector elements.
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- }
- if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+
+ if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
+ Depth + 1))
return true;
+
+ // Attempt to avoid multi-use src if we don't need anything from it.
+ if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (DemandedSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::CONCAT_VECTORS: {
@@ -1069,7 +1185,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
+ DemandedElts, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -1117,16 +1234,14 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
+ TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
- // Output known-1 bits are only known if set in both the LHS & RHS.
- Known.One &= Known2.One;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- Known.Zero |= Known2.Zero;
+ Known &= Known2;
break;
}
case ISD::OR: {
@@ -1163,16 +1278,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// If the operation can be done in a smaller type, do so.
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- Known.Zero &= Known2.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- Known.One |= Known2.One;
+ Known |= Known2;
break;
}
case ISD::XOR: {
@@ -1218,12 +1330,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
-
- if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
+ ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
+ if (C) {
// If one side is a constant, and all of the known set bits on the other
// side are also set in the constant, turn this into an AND, as we know
// the bits will be cleared.
@@ -1238,19 +1346,20 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue()) {
- if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
- // We're flipping all demanded bits. Flip the undemanded bits too.
- SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
- return TLO.CombineTo(Op, New);
- }
- // If we can't turn this into a 'not', try to shrink the constant.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
- return true;
+ if (!C->isAllOnesValue() &&
+ DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
+ return TLO.CombineTo(Op, New);
}
}
- Known = std::move(KnownOut);
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (!C || !C->isAllOnesValue())
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
+ return true;
+
+ Known ^= Known2;
break;
}
case ISD::SELECT:
@@ -1264,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -1282,7 +1391,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
+ if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -1320,12 +1429,10 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::SHL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1336,37 +1443,25 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if (SA2->getAPIntValue().ult(BitWidth)) {
- unsigned C1 = SA2->getZExtValue();
- unsigned Opc = ISD::SHL;
- int Diff = ShAmt - C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SRL;
- }
-
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
}
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
- if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
- Known, TLO, Depth + 1))
- return true;
-
- // Try shrinking the operation as long as the shift amount will still be
- // in range.
- if ((ShAmt < DemandedBits.getActiveBits()) &&
- ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
- return true;
-
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = Op0.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
@@ -1382,22 +1477,24 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
+
// Repeat the SHL optimization above in cases where an extension
// intervenes: (shl (anyext (shr x, c1)), c2) to
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse()) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(InnerOp.getOperand(1))) {
- unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
+ unsigned InnerShAmt = SA2->getZExtValue();
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
DemandedBits.countTrailingZeros() >= ShAmt) {
- SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op1.getValueType());
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
InnerOp.getOperand(0));
return TLO.CombineTo(
@@ -1407,60 +1504,76 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
+ APInt InDemandedMask = DemandedBits.lshr(ShAmt);
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
+ return true;
+ }
+
+ // If we are only demanding sign bits then we can use the shift source
+ // directly.
+ if (const APInt *MaxSA =
+ TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ unsigned ShAmt = MaxSA->getZExtValue();
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
+ return TLO.CombineTo(Op, Op0);
}
break;
}
case ISD::SRL: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
- EVT ShiftVT = Op1.getValueType();
- APInt InDemandedMask = (DemandedBits << ShAmt);
-
- // If the shift is exact, then it does demand the low bits (and knows that
- // they are zero).
- if (Op->getFlags().hasExact())
- InDemandedMask.setLowBits(ShAmt);
-
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 =
- isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if (!DemandedBits.intersects(
- APInt::getHighBitsSet(BitWidth, ShAmt))) {
- if (SA2->getAPIntValue().ult(BitWidth)) {
- unsigned C1 = SA2->getZExtValue();
- unsigned Opc = ISD::SRL;
- int Diff = ShAmt - C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SHL;
- }
-
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
+ if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
+ if (const APInt *SA2 =
+ TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt - C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
}
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
}
}
+ APInt InDemandedMask = (DemandedBits << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
@@ -1468,14 +1581,22 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
Known.One.lshrInPlace(ShAmt);
-
- Known.Zero.setHighBits(ShAmt); // High bits known zero.
+ // High bits known zero.
+ Known.Zero.setHighBits(ShAmt);
}
break;
}
case ISD::SRA: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ EVT ShiftVT = Op1.getValueType();
+
+ // If we only want bits that already match the signbit then we don't need
+ // to shift.
+ unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
+ NumHiDemandedBits)
+ return TLO.CombineTo(Op, Op0);
// If this is an arithmetic shift right and only the low-bit is set, we can
// always convert this into a logical shr, even if the shift amount is
@@ -1484,11 +1605,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
- // If the shift count is an invalid immediate, don't do anything.
- if (SA->getAPIntValue().uge(BitWidth))
- break;
-
+ if (const APInt *SA =
+ TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
if (ShAmt == 0)
return TLO.CombineTo(Op, Op0);
@@ -1525,14 +1643,23 @@ bool TargetLowering::SimplifyDemandedBits(
int Log2 = DemandedBits.exactLogBase2();
if (Log2 >= 0) {
// The bit must come from the sign.
- SDValue NewSA =
- TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
+ SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
}
if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
Known.One.setHighBits(ShAmt);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
break;
}
@@ -1573,6 +1700,32 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
}
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ }
+ break;
+ }
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
+ if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, Op0);
+
+ // For pow-2 bitwidths we only demand the bottom modulo amt bits.
+ if (isPowerOf2_32(BitWidth)) {
+ APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
+ if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ }
break;
}
case ISD::BITREVERSE: {
@@ -1602,7 +1755,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+ unsigned NumSignBits =
+ TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
@@ -1639,8 +1793,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input sign bit is known zero, convert this into a zero extension.
if (Known.Zero[ExVTBits - 1])
- return TLO.CombineTo(
- Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
+ return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
if (Known.One[ExVTBits - 1]) { // Input sign bit known set
@@ -1704,7 +1857,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
- Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::SIGN_EXTEND:
@@ -1777,7 +1930,12 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::TRUNCATE: {
@@ -1886,7 +2044,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known = Known2;
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth, false /* => any extend */);
+ Known = Known.anyext(BitWidth);
break;
}
case ISD::BITCAST: {
@@ -2151,14 +2309,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- assert(VT.getVectorNumElements() == NumElts &&
- "Mask size mismatches value type element count!");
KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ // TODO: For now we assume we know nothing about scalable vectors.
+ if (VT.isScalableVector())
+ return false;
+
+ assert(VT.getVectorNumElements() == NumElts &&
+ "Mask size mismatches value type element count!");
+
// Undef operand.
if (Op.isUndef()) {
KnownUndef.setAllBits();
@@ -2182,7 +2346,22 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
- switch (Op.getOpcode()) {
+ // Helper for demanding the specified elements and all the bits of both binary
+ // operands.
+ auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
+ SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
+ TLO.DAG, Depth + 1);
+ SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
+ TLO.DAG, Depth + 1);
+ if (NewOp0 || NewOp1) {
+ SDValue NewOp = TLO.DAG.getNode(
+ Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ return false;
+ };
+
+ switch (Opcode) {
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0]) {
KnownUndef.setAllBits();
@@ -2234,7 +2413,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
KnownBits Known;
- if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
+ TLO, Depth + 1))
return true;
}
@@ -2323,53 +2503,75 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::INSERT_SUBVECTOR: {
- if (!isa<ConstantSDNode>(Op.getOperand(2)))
- break;
- SDValue Base = Op.getOperand(0);
+ // Demand any elements from the subvector and the remainder from the src its
+ // inserted into.
+ SDValue Src = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
- EVT SubVT = Sub.getValueType();
- unsigned NumSubElts = SubVT.getVectorNumElements();
- const APInt &Idx = Op.getConstantOperandAPInt(2);
- if (Idx.ugt(NumElts - NumSubElts))
- break;
- unsigned SubIdx = Idx.getZExtValue();
- APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+
APInt SubUndef, SubZero;
- if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
+ if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
Depth + 1))
return true;
- APInt BaseElts = DemandedElts;
- BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
-
- // If none of the base operand elements are demanded, replace it with undef.
- if (!BaseElts && !Base.isUndef())
- return TLO.CombineTo(Op,
- TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
- TLO.DAG.getUNDEF(VT),
- Op.getOperand(1),
- Op.getOperand(2)));
-
- if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
- Depth + 1))
+
+ // If none of the src operand elements are demanded, replace it with undef.
+ if (!DemandedSrcElts && !Src.isUndef())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT), Sub,
+ Op.getOperand(2)));
+
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
return true;
- KnownUndef.insertBits(SubUndef, SubIdx);
- KnownZero.insertBits(SubZero, SubIdx);
+ KnownUndef.insertBits(SubUndef, Idx);
+ KnownZero.insertBits(SubZero, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcElts.isAllOnesValue() ||
+ !DemandedSubElts.isAllOnesValue()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
+ Sub, DemandedSubElts, TLO.DAG, Depth + 1);
+ if (NewSrc || NewSub) {
+ NewSrc = NewSrc ? NewSrc : Src;
+ NewSub = NewSub ? NewSub : Sub;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ NewSub, Op.getOperand(2));
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::EXTRACT_SUBVECTOR: {
+ // Offset the demanded elts by the subvector index.
SDValue Src = Op.getOperand(0);
- ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
- // Offset the demanded elts by the subvector index.
- uint64_t Idx = SubIdx->getZExtValue();
- APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
- Depth + 1))
- return true;
- KnownUndef = SrcUndef.extractBits(NumElts, Idx);
- KnownZero = SrcZero.extractBits(NumElts, Idx);
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownUndef = SrcUndef.extractBits(NumElts, Idx);
+ KnownZero = SrcZero.extractBits(NumElts, Idx);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnesValue()) {
+ SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
+ Src, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewSrc) {
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
+ Op.getOperand(1));
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
@@ -2538,7 +2740,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
- // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
@@ -2549,17 +2751,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
KnownZero = ZeroLHS & ZeroRHS;
KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::SHL:
@@ -2567,27 +2778,39 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::SRA:
case ISD::ROTL:
case ISD::ROTR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt UndefRHS, ZeroRHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
- ZeroRHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
+ Depth + 1))
return true;
APInt UndefLHS, ZeroLHS;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
- ZeroLHS, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1))
return true;
KnownZero = ZeroLHS;
KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::MUL:
case ISD::AND: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
- KnownZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
return true;
// If either side has a zero element, then the result element is zero, even
@@ -2597,6 +2820,12 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ // TODO - use KnownUndef to relax the demandedelts?
+ if (!DemandedElts.isAllOnesValue())
+ if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
+ return true;
break;
}
case ISD::TRUNCATE:
@@ -2661,17 +2890,16 @@ void TargetLowering::computeKnownBitsForTargetInstr(
Known.resetAll();
}
-void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
- KnownBits &Known,
- const APInt &DemandedElts,
- const SelectionDAG &DAG,
- unsigned Depth) const {
- assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+void TargetLowering::computeKnownBitsForFrameIndex(
+ const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
+}
- if (unsigned Align = DAG.InferPtrAlignment(Op)) {
- // The low bits are known zero if the pointer is aligned.
- Known.Zero.setLowBits(Log2_32(Align));
- }
+Align TargetLowering::computeKnownAlignForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ return Align(1);
}
/// This method can be implemented by targets that want to expose additional
@@ -2689,6 +2917,12 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+unsigned TargetLowering::computeNumSignBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
+ const MachineRegisterInfo &MRI, unsigned Depth) const {
+ return 1;
+}
+
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth) const {
@@ -3788,33 +4022,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the comparison operands is infinity or negative infinity, convert the
// condition to a less-awkward <= or >=.
if (CFP->getValueAPF().isInfinity()) {
- if (CFP->getValueAPF().isNegative()) {
- if (Cond == ISD::SETOEQ &&
- isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
- if (Cond == ISD::SETUEQ &&
- isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
- if (Cond == ISD::SETUNE &&
- isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
- if (Cond == ISD::SETONE &&
- isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
- } else {
- if (Cond == ISD::SETOEQ &&
- isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
- if (Cond == ISD::SETUEQ &&
- isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
- if (Cond == ISD::SETUNE &&
- isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
- if (Cond == ISD::SETONE &&
- isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
- return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ bool IsNegInf = CFP->getValueAPF().isNegative();
+ ISD::CondCode NewCond = ISD::SETCC_INVALID;
+ switch (Cond) {
+ case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
+ case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
+ case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
+ case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
+ default: break;
}
+ if (NewCond != ISD::SETCC_INVALID &&
+ isCondCodeLegal(NewCond, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
}
}
@@ -4245,10 +4464,10 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
TargetLowering::AsmOperandInfoVector
TargetLowering::ParseConstraints(const DataLayout &DL,
const TargetRegisterInfo *TRI,
- ImmutableCallSite CS) const {
+ const CallBase &Call) const {
/// Information about all of the constraints.
AsmOperandInfoVector ConstraintOperands;
- const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
unsigned maCount = 0; // Largest number of multiple alternative constraints.
// Do a prepass over the constraints, canonicalizing them, and building up the
@@ -4271,25 +4490,24 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case InlineAsm::isOutput:
// Indirect outputs just consume an argument.
if (OpInfo.isIndirect) {
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
}
// The return value of the call is this value. As such, there is no
// corresponding argument.
- assert(!CS.getType()->isVoidTy() &&
- "Bad inline asm!");
- if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
OpInfo.ConstraintVT =
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
+ OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
}
++ResNo;
break;
case InlineAsm::isInput:
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
@@ -5479,251 +5697,221 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
-char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, bool ForCodeSize,
- unsigned Depth) const {
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOps, bool OptForSize,
+ NegatibleCost &Cost,
+ unsigned Depth) const {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return 2;
+ if (Op.getOpcode() == ISD::FNEG) {
+ Cost = NegatibleCost::Cheaper;
+ return Op.getOperand(0);
+ }
- // Don't allow anything with multiple uses unless we know it is free.
- EVT VT = Op.getValueType();
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Pre-increment recursion depth for use in recursive calls.
+ ++Depth;
const SDNodeFlags Flags = Op->getFlags();
const TargetOptions &Options = DAG.getTarget().Options;
- if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
- isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
+ EVT VT = Op.getValueType();
+ unsigned Opcode = Op.getOpcode();
- // Don't recurse exponentially.
- if (Depth > SelectionDAG::MaxRecursionDepth)
- return 0;
+ // Don't allow anything with multiple uses unless we know it is free.
+ if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
+ bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType());
+ if (!IsFreeExtend)
+ return SDValue();
+ }
- switch (Op.getOpcode()) {
- case ISD::ConstantFP: {
- if (!LegalOperations)
- return 1;
+ SDLoc DL(Op);
+ switch (Opcode) {
+ case ISD::ConstantFP: {
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
- return isOperationLegal(ISD::ConstantFP, VT) ||
- isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
- ForCodeSize);
+ bool IsOpLegal =
+ isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ OptForSize);
+
+ if (LegalOps && !IsOpLegal)
+ break;
+
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ SDValue CFP = DAG.getConstantFP(V, DL, VT);
+
+ // If we already have the use of the negated floating constant, it is free
+ // to negate it even it has multiple uses.
+ if (!Op.hasOneUse() && CFP.use_empty())
+ break;
+ Cost = NegatibleCost::Neutral;
+ return CFP;
}
case ISD::BUILD_VECTOR: {
// Only permit BUILD_VECTOR of constants.
if (llvm::any_of(Op->op_values(), [&](SDValue N) {
return !N.isUndef() && !isa<ConstantFPSDNode>(N);
}))
- return 0;
- if (!LegalOperations)
- return 1;
- if (isOperationLegal(ISD::ConstantFP, VT) &&
- isOperationLegal(ISD::BUILD_VECTOR, VT))
- return 1;
- return llvm::all_of(Op->op_values(), [&](SDValue N) {
- return N.isUndef() ||
- isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
- ForCodeSize);
- });
+ break;
+
+ bool IsOpLegal =
+ (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
+ llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ OptForSize);
+ });
+
+ if (LegalOps && !IsOpLegal)
+ break;
+
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
+ }
+ Cost = NegatibleCost::Neutral;
+ return DAG.getBuildVector(VT, DL, Ops);
}
- case ISD::FADD:
+ case ISD::FADD: {
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
+ break;
// After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
- return 0;
+ if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ break;
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1))
- return V;
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- case ISD::FSUB:
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+ }
+
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+ }
+ break;
+ }
+ case ISD::FSUB: {
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
+ break;
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return 1;
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ // fold (fneg (fsub 0, Y)) -> Y
+ if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
+ if (C->isZero()) {
+ Cost = NegatibleCost::Cheaper;
+ return Y;
+ }
+ // fold (fneg (fsub X, Y)) -> (fsub Y, X)
+ Cost = NegatibleCost::Neutral;
+ return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
+ }
case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1))
- return V;
+ case ISD::FDIV: {
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
+
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = CostX;
+ return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+ }
// Ignore X * 2.0 because that is expected to be canonicalized to X + X.
if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
- return 0;
-
- return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
+ break;
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = CostY;
+ return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+ }
+ break;
+ }
case ISD::FMA:
case ISD::FMAD: {
if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
+ break;
+
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
+ NegatibleCost CostZ = NegatibleCost::Expensive;
+ SDValue NegZ =
+ getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
+ // Give up if fail to negate the Z.
+ if (!NegZ)
+ break;
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ NegatibleCost CostX = NegatibleCost::Expensive;
+ SDValue NegX =
+ getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
- char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- if (!V2)
- return 0;
-
- // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
- char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V01 = std::max(V0, V1);
- return V01 ? std::max(V01, V2) : 0;
- }
-
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- }
-
- return 0;
-}
-
-SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations,
- bool ForCodeSize,
- unsigned Depth) const {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return Op.getOperand(0);
+ NegatibleCost CostY = NegatibleCost::Expensive;
+ SDValue NegY =
+ getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
- assert(Depth <= SelectionDAG::MaxRecursionDepth &&
- "getNegatedExpression doesn't match isNegatibleForFree");
- const SDNodeFlags Flags = Op->getFlags();
-
- switch (Op.getOpcode()) {
- case ISD::ConstantFP: {
- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
- V.changeSign();
- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
- }
- case ISD::BUILD_VECTOR: {
- SmallVector<SDValue, 4> Ops;
- for (SDValue C : Op->op_values()) {
- if (C.isUndef()) {
- Ops.push_back(C);
- continue;
- }
- APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
- V.changeSign();
- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ // Negate the X if its cost is less or equal than Y.
+ if (NegX && (CostX <= CostY)) {
+ Cost = std::min(CostX, CostZ);
+ return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
}
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
- }
- case ISD::FADD:
- assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros()) &&
- "Expected NSZ fp-flag");
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
- Depth + 1))
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(0), Flags);
- case ISD::FSUB:
- // fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP =
- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
- if (N0CFP->isZero())
- return Op.getOperand(1);
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0), Flags);
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
- Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
-
- // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
- return DAG.getNode(
- Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
- getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1),
- Flags);
- case ISD::FMA:
- case ISD::FMAD: {
- assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros()) &&
- "Expected NSZ fp-flag");
-
- SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
-
- char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- // TODO: This is a hack. It is possible that costs have changed between now
- // and the initial calls to isNegatibleForFree(). That is because we
- // are rewriting the expression, and that may change the number of
- // uses (and therefore the cost) of values. If the negation costs are
- // equal, only negate this value if it is a constant. Otherwise, try
- // operand 1. A better fix would eliminate uses as a cost factor or
- // track the change in uses as we rewrite the expression.
- if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
- // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
- SDValue Neg0 = getNegatedExpression(
- Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
- Op.getOperand(1), Neg2, Flags);
+ // Negate the Y if it is not expensive.
+ if (NegY) {
+ Cost = std::min(CostY, CostZ);
+ return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
}
-
- // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
- SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
- ForCodeSize, Depth + 1);
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), Neg1, Neg2, Flags);
+ break;
}
case ISD::FP_EXTEND:
case ISD::FSIN:
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1));
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(Opcode, DL, VT, NegV);
+ break;
case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- getNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1));
+ if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
+ OptForSize, Cost, Depth))
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
+ break;
}
- llvm_unreachable("Unknown code");
+ return SDValue();
}
//===----------------------------------------------------------------------===//
@@ -5929,6 +6117,14 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
return Ok;
}
+// Check that (every element of) Z is undef or not an exact multiple of BW.
+static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) {
+ return ISD::matchUnaryPredicate(
+ Z,
+ [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
+ true);
+}
+
bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
@@ -5939,41 +6135,54 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
return false;
- // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
SDValue X = Node->getOperand(0);
SDValue Y = Node->getOperand(1);
SDValue Z = Node->getOperand(2);
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned BW = VT.getScalarSizeInBits();
bool IsFSHL = Node->getOpcode() == ISD::FSHL;
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Z.getValueType();
- SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
- SDValue Zero = DAG.getConstant(0, DL, ShVT);
- SDValue ShAmt;
- if (isPowerOf2_32(EltSizeInBits)) {
- SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
- } else {
+ SDValue ShX, ShY;
+ SDValue ShAmt, InvShAmt;
+ if (isNonZeroModBitWidth(Z, BW)) {
+ // fshl: X << C | Y >> (BW - C)
+ // fshr: X << (BW - C) | Y >> C
+ // where C = Z % BW is not zero
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
- }
-
- SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
- SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
-
- // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
- // and that is undefined. We must compare and select to avoid UB.
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
+ } else {
+ // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
+ // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
+ SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
+ if (isPowerOf2_32(BW)) {
+ // Z % BW -> Z & (BW - 1)
+ ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
+ // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
+ InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
+ } else {
+ SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
+ ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
+ InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
+ }
- // For fshl, 0-shift returns the 1st arg (X).
- // For fshr, 0-shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
- Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
+ SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
+ } else {
+ SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
+ ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
+ ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
+ }
+ }
+ Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
return true;
}
@@ -5988,12 +6197,15 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
SDLoc DL(SDValue(Node, 0));
EVT ShVT = Op1.getValueType();
- SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
- // If a rotate in the other direction is legal, use it.
+ assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
+ "Expecting the type bitwidth to be a power of 2");
+
+ // If a rotate in the other direction is supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegal(RevRot, VT)) {
- SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ if (isOperationLegalOrCustom(RevRot, VT)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
return true;
}
@@ -6006,15 +6218,13 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
return false;
// Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
+ // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1)))
+ // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1)))
//
- assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
- "Expecting the type bitwidth to be a power of 2");
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
@@ -6198,114 +6408,50 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
- if (SrcVT.getScalarType() != MVT::i64)
+ if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
+ return false;
+
+ // Only expand vector types if we have the appropriate vector bit operations.
+ if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
+ !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
+ !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
return false;
SDLoc dl(SDValue(Node, 0));
EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
- if (DstVT.getScalarType() == MVT::f32) {
- // Only expand vector types if we have the appropriate vector bit
- // operations.
- if (SrcVT.isVector() &&
- (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
- !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
- !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
- return false;
-
- // For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundisf in compiler_rt.
-
- // TODO: This really should be implemented using a branch rather than a
- // select. We happen to get lucky and machinesink does the right
- // thing most of the time. This would be a good candidate for a
- // pseudo-op, or, even better, for whole-function isel.
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
-
- SDValue SignBitTest = DAG.getSetCC(
- dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
-
- SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
- SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
- SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
- SDValue Slow, Fast;
- if (Node->isStrictFPOpcode()) {
- // In strict mode, we must avoid spurious exceptions, and therefore
- // must make sure to only emit a single STRICT_SINT_TO_FP.
- SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
- Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
- { Node->getOperand(0), InCvt });
- Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
- { Fast.getValue(1), Fast, Fast });
- Chain = Slow.getValue(1);
- // The STRICT_SINT_TO_FP inherits the exception mode from the
- // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
- // never raise any exception.
- SDNodeFlags Flags;
- Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
- Fast->setFlags(Flags);
- Flags.setNoFPExcept(true);
- Slow->setFlags(Flags);
- } else {
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
- Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
- }
-
- Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
- return true;
- }
-
- if (DstVT.getScalarType() == MVT::f64) {
- // Only expand vector types if we have the appropriate vector bit
- // operations.
- if (SrcVT.isVector() &&
- (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
- !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
- !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
- return false;
-
- // Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
- SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
- SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
- BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
- SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
- SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
- SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
-
- SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
- SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
- SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
- SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
- SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
- SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- if (Node->isStrictFPOpcode()) {
- SDValue HiSub =
- DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
- {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
- Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
- {HiSub.getValue(1), LoFlt, HiSub});
- Chain = Result.getValue(1);
- } else {
- SDValue HiSub =
- DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
- }
- return true;
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
+ SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
+ BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
+ SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
+ SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
+
+ SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
+ SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
+ SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
}
-
- return false;
+ return true;
}
SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
@@ -6564,12 +6710,61 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
EVT SrcVT = LD->getMemoryVT();
+ EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
- EVT DstEltVT = LD->getValueType(0).getScalarType();
+ EVT DstEltVT = DstVT.getScalarType();
+
+ // A vector must always be stored in memory as-is, i.e. without any padding
+ // between the elements, since various code depend on it, e.g. in the
+ // handling of a bitcast of a vector type to int, which may be done with a
+ // vector store followed by an integer load. A vector that does not have
+ // elements that are byte-sized must therefore be stored as an integer
+ // built out of the extracted vector elements.
+ if (!SrcEltVT.isByteSized()) {
+ unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
+ EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
+
+ unsigned NumSrcBits = SrcVT.getSizeInBits();
+ EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
+
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
+
+ // Load the whole vector and avoid masking off the top bits as it makes
+ // the codegen worse.
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
+ LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ unsigned ShiftIntoIdx =
+ (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
+ LoadVT, SL, /*LegalTypes=*/false);
+ SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
+ SDValue Elt =
+ DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
+ SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
+
+ if (ExtType != ISD::NON_EXTLOAD) {
+ unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
+ Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
+ }
+
+ Vals.push_back(Scalar);
+ }
+
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
+ return std::make_pair(Value, Load.getValue(1));
+ }
unsigned Stride = SrcEltVT.getSizeInBits() / 8;
assert(SrcEltVT.isByteSized());
@@ -6591,7 +6786,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
- SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
+ SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
return std::make_pair(Value, NewChain);
}
@@ -6612,7 +6807,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
unsigned NumElem = StVT.getVectorNumElements();
// A vector must always be stored in memory as-is, i.e. without any padding
@@ -6629,7 +6823,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
unsigned ShiftIntoIdx =
@@ -6654,7 +6848,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SmallVector<SDValue, 8> Stores;
for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
- DAG.getConstant(Idx, SL, IdxVT));
+ DAG.getVectorIdxConstant(Idx, SL));
SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
@@ -7313,12 +7507,13 @@ SDValue
TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
SDValue LHS, SDValue RHS,
unsigned Scale, SelectionDAG &DAG) const {
- assert((Opcode == ISD::SDIVFIX ||
- Opcode == ISD::UDIVFIX) &&
+ assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
+ Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
"Expected a fixed point division opcode");
EVT VT = LHS.getValueType();
- bool Signed = Opcode == ISD::SDIVFIX;
+ bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+ bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// If there is enough room in the type to upscale the LHS or downscale the
@@ -7330,7 +7525,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
: DAG.computeKnownBits(LHS).countMinLeadingZeros();
unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
- if (LHSLead + RHSTrail < Scale)
+ // For signed saturating operations, we need to be able to detect true integer
+ // division overflow; that is, when you have MIN / -EPS. However, this
+ // is undefined behavior and if we emit divisions that could take such
+ // values it may cause undesired behavior (arithmetic exceptions on x86, for
+ // example).
+ // Avoid this by requiring an extra bit so that we never get this case.
+ // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
+ // signed saturating division, we need to emit a whopping 32-bit division.
+ if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
return SDValue();
unsigned LHSShift = std::min(LHSLead, Scale);
@@ -7384,8 +7587,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
Quot = DAG.getNode(ISD::UDIV, dl, VT,
LHS, RHS);
- // TODO: Saturation.
-
return Quot;
}
@@ -7659,3 +7860,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
return Res;
}
+
+bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getValueType(0);
+ SDLoc dl(Node);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ SDValue Dividend = Node->getOperand(0);
+ SDValue Divisor = Node->getOperand(1);
+ if (isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
+ return true;
+ } else if (isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
+ Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
+ return true;
+ }
+ return false;
+}